def upload(workspace: str, graph: str) -> Any: """Store a d3 json-encoded graph into the database, with node and edge tables. `workspace` - the target workspace `graph` - the target graph `data` - the json data, passed in the request body. The json data should contain nodes: [] and links: [] """ loaded_workspace = Workspace(workspace) if loaded_workspace.has_graph(graph): raise AlreadyExists("graph", graph) # Get data from the request and load it as json body = decode_data(request.data) data = json.load(StringIO(body), object_pairs_hook=OrderedDict) # Check file structure errors = validate_d3_json(data) if len(errors) > 0: raise ValidationFailed(errors) node_table_name = f"{graph}_nodes" edge_table_name = f"{graph}_links" # Change column names from the d3 format to the arango format nodes = data["nodes"] for node in nodes: node["_key"] = str(node["id"]) del node["id"] links = data["links"] for link in links: link["_from"] = f"{node_table_name}/{link['source']}" link["_to"] = f"{node_table_name}/{link['target']}" del link["source"] del link["target"] # Create or retrieve the node and edge tables if loaded_workspace.has_table(node_table_name): node_table = loaded_workspace.table(node_table_name) else: node_table = loaded_workspace.create_table(node_table_name, edge=False) if loaded_workspace.has_table(edge_table_name): edge_table = loaded_workspace.table(edge_table_name) else: edge_table = loaded_workspace.create_table(edge_table_name, edge=True) # Insert data node_table.insert(nodes) edge_table.insert(links) loaded_workspace.create_graph(graph, edge_table_name) return {"nodecount": len(nodes), "edgecount": len(links)}
def upload(workspace: str, graph: str) -> Any: """ Store a nested_json tree into the database in coordinated node and edge tables. `workspace` - the target workspace. `graph` - the target graph. `data` - the nested_json data, passed in the request body. """ loaded_workspace = Workspace(workspace) if loaded_workspace.has_graph(graph): raise AlreadyExists("graph", graph) # Set up the parameters. data = request.data.decode("utf8") edgetable_name = f"{graph}_edges" int_nodetable_name = f"{graph}_internal_nodes" leaf_nodetable_name = f"{graph}_leaf_nodes" # Set up the database targets. if loaded_workspace.has_table(edgetable_name): edgetable = loaded_workspace.table(edgetable_name) else: edgetable = loaded_workspace.create_table(edgetable_name, edge=True) if loaded_workspace.has_table(int_nodetable_name): int_nodetable = loaded_workspace.table(int_nodetable_name) else: int_nodetable = loaded_workspace.create_table(int_nodetable_name, edge=False) if loaded_workspace.has_table(leaf_nodetable_name): leaf_nodetable = loaded_workspace.table(leaf_nodetable_name) else: leaf_nodetable = loaded_workspace.create_table(leaf_nodetable_name, edge=False) # Analyze the nested_json data into a node and edge table. (nodes, edges) = analyze_nested_json(data, int_nodetable_name, leaf_nodetable_name) # Upload the data to the database. edgetable.insert(edges) int_nodetable.insert(nodes[0]) leaf_nodetable.insert(nodes[1]) # Create graph loaded_workspace.create_graph(graph, edgetable_name) return { "edgecount": len(edges), "int_nodecount": len(nodes[0]), "leaf_nodecount": len(nodes[1]), }
def download(workspace: str, table: str) -> Any: """ Download a table from the database as a CSV file. `workspace` - the target workspace `table` - the target table """ loaded_workspace = Workspace(workspace) if not loaded_workspace.has_table(table): raise NotFound("table", table) loaded_table = loaded_workspace.table(table) table_rows = loaded_table.rows()["rows"] fields = loaded_table.headers() def csv_row_generator() -> Generator[str, None, None]: header_line = StringIO() writer = csv.DictWriter(header_line, fieldnames=fields) writer.writeheader() yield header_line.getvalue() for csv_row in generate_filtered_docs(table_rows): line = StringIO() writer = csv.DictWriter(line, fieldnames=fields) writer.writerow(csv_row) yield line.getvalue() response = Response(csv_row_generator(), mimetype="text/csv") response.headers[ "Content-Disposition"] = f"attachment; filename={table}.csv" response.headers["Content-type"] = "text/csv" return response
def upload(workspace: str, table: str, key: str = "_key", overwrite: bool = False) -> Any: """ Store a CSV file into the database as a node or edge table. `workspace` - the target workspace `table` - the target table `data` - the CSV data, passed in the request body. If the CSV data contains `_from` and `_to` fields, it will be treated as an edge table. """ loaded_workspace = Workspace(workspace) if loaded_workspace.has_table(table): raise AlreadyExists("table", table) app.logger.info("Bulk Loading") # Read the request body into CSV format body = decode_data(request.data) try: # Type to a Dict rather than an OrderedDict rows: List[Dict[str, str]] = list(csv.DictReader(StringIO(body))) except csv.Error: raise CSVReadError() # Perform validation. validate_csv(rows, key, overwrite) # Once we reach here, we know that the specified key field must be present, # and either: # key == "_key" # noqa: E800 # or key != "_key" and the "_key" field is not present # or key != "_key" and "_key" is present, but overwrite = True if key != "_key": rows = set_table_key(rows, key) # Check if it's an edge table or not fieldnames = rows[0].keys() edges = "_from" in fieldnames and "_to" in fieldnames # Create table and insert the data loaded_table = loaded_workspace.create_table(table, edges) results = loaded_table.insert(rows) return {"count": len(results)}
def upload(workspace: str, graph: str) -> Any: """ Store a newick tree into the database in coordinated node and edge tables. `workspace` - the target workspace. `graph` - the target graph. `data` - the newick data, passed in the request body. """ app.logger.info("newick tree") loaded_workspace = Workspace(workspace) if loaded_workspace.has_graph(graph): raise AlreadyExists("graph", graph) body = decode_data(request.data) tree = newick.loads(body) validate_newick(tree) edgetable_name = f"{graph}_edges" nodetable_name = f"{graph}_nodes" if loaded_workspace.has_table(edgetable_name): edgetable = loaded_workspace.table(edgetable_name) else: # Note that edge=True must be set or the _from and _to keys # will be ignored below. edgetable = loaded_workspace.create_table(edgetable_name, edge=True) if loaded_workspace.has_table(nodetable_name): nodetable = loaded_workspace.table(nodetable_name) else: nodetable = loaded_workspace.create_table(nodetable_name, edge=False) edgecount = 0 nodecount = 0 def read_tree(parent: Optional[str], node: newick.Node) -> None: nonlocal nodecount nonlocal edgecount key = node.name or uuid.uuid4().hex if not nodetable.row(key): nodetable.insert([{"_key": key}]) nodecount = nodecount + 1 for desc in node.descendants: read_tree(key, desc) if parent: edgetable.insert( [ { "_from": f"{nodetable_name}/{parent}", "_to": f"{nodetable_name}/{key}", "length": node.length, } ] ) edgecount += 1 read_tree(None, tree[0]) loaded_workspace.create_graph(graph, edgetable_name) return {"edgecount": edgecount, "nodecount": nodecount}
def upload( workspace: str, table: str, key: str = "_key", overwrite: bool = False, metadata: Optional[str] = None, ) -> Any: """ Store a CSV file into the database as a node or edge table. `workspace` - the target workspace `table` - the target table `data` - the CSV data, passed in the request body. If the CSV data contains `_from` and `_to` fields, it will be treated as an edge table. """ loaded_workspace = Workspace(workspace) if loaded_workspace.has_table(table): raise AlreadyExists("table", table) app.logger.info("Bulk Loading") # Read the request body into CSV format body = decode_data(request.data) try: # Type to a Dict rather than an OrderedDict csv_rows: List[UnprocessedTableRow] = list(csv.DictReader(StringIO(body))) except csv.Error: raise CSVReadError() # TODO: This temporarily needs to be done here, so that validation of the metadata # can be done before the table is actually created. Once the API is updated, this # will change. # https://github.com/multinet-app/multinet-server/issues/493 metadata_dict = {} if metadata: try: metadata_dict = json.loads(metadata) except json.decoder.JSONDecodeError: raise BadQueryArgument("metadata", metadata) table_metadata = table_metadata_from_dict(metadata_dict) rows, metadata_validation_errors = process_rows(csv_rows, table_metadata.columns) # Perform validation. csv_validation_errors = validate_csv(rows, key, overwrite) validation_errors = [*metadata_validation_errors, *csv_validation_errors] if len(validation_errors): raise ValidationFailed(errors=validation_errors) # Once we reach here, we know that the specified key field must be present, # and either: # key == "_key" # noqa: E800 # or key != "_key" and the "_key" field is not present # or key != "_key" and "_key" is present, but overwrite = True if key != "_key": rows = set_table_key(rows, key) # Create table and insert the data loaded_table = loaded_workspace.create_table(table, edge=is_edge_table(rows)) # Set table metadata loaded_table.set_metadata(metadata_dict) results = loaded_table.insert(rows) return {"count": len(results)}