def tablize(self, data): """ Convert a list of data into a table. """ # Currently first tries a relatively fast 'tablize' that doesn't deal with nested lists # falling back to the 'old' code if the data isnot a list of (nested) dicts #TODO: [WvA] do we actually need the old codepath, ie do we ever have nested lists? try: return self.fast_tablize(data) except ValueError: pass # First, flatten the data (i.e., convert it to a list of # dictionaries that are each exactly one level deep). The key for # each item designates the name of the column that the item will # fall into. data = self.flatten_data(data) #import json; print(json.dumps(data, indent=2)) # Get the set of all unique headers, and sort them. headers = OrderedDict() for item in data: for k, v in item.items(): if k not in headers: headers[k] = set() headers[k].add(type(v)) table = table3.ObjectTable(rows=data) for header in headers: fieldtype = headers[header] if len(fieldtype) == 1: fieldtype = list(fieldtype)[0] else: fieldtype = None fieldtype = {bool:str, type(None):str}.get(fieldtype, fieldtype) table.add_column(label=header, col=partial(lambda key, item: item.get(key, None), header), fieldtype=fieldtype) return table
def get_table(self, codingjobs, export_level, **kargs): codingjobs = CodingJob.objects.prefetch_related("coded_articles__codings__values").filter(pk__in=codingjobs) # Get all row of table self.progress_monitor.update(5, "Preparing Jobs") rows = list(_get_rows( codingjobs, include_sentences=(int(export_level) != CODING_LEVEL_ARTICLE), include_multiple=True, include_uncoded_articles=False, progress_monitor=self.progress_monitor )) table = table3.ObjectTable(rows=rows) self.progress_monitor.update(5, "Preparing columns") # Meta field columns for field in _METAFIELDS: if self.options.get("meta_{field.object}_{field.attr}".format(**locals())): if field.object == "subsentence": table.addColumn(SubSentenceColumn(field)) else: table.addColumn(MetaColumn(field)) # Build columns based on form schemafields for schemafield in self.bound_form.schemafields: prefix = _get_field_prefix(schemafield) if self.options[prefix+"_included"]: options = {k[len(prefix)+1:] :v for (k,v) in self.options.iteritems() if k.startswith(prefix)} for label, function in schemafield.serialiser.get_export_columns(**options): table.addColumn(CodingColumn(schemafield, label, function)) return table
def get_table(self, assocTable, rowheader_label="interval", rowheader_type=str, cell_type=float): intervals = sorted({i for (i, q, q2, p) in assocTable}) assocs = {(x, y) for (i, x, y, s) in assocTable} cols = {u"{x}\u2192{y}".format(x=x, y=y): (x, y) for (x, y) in assocs} scores = {(i, x, y): s for (i, x, y, s) in assocTable} colnames = sorted(cols) result = table3.ObjectTable(rows=intervals) result.addColumn( table3.ObjectColumn(label=rowheader_label, cellfunc=lambda row: row, fieldtype=rowheader_type)) for col, (x, y) in sorted(cols.iteritems()): result.addColumn( table3.ObjectColumn(label=unicode(col), fieldtype=cell_type, cellfunc=partial(getscore, scores, x, y))) return result
def tablize(self, data): """ Convert a list of data into a table. """ # First, flatten the data (i.e., convert it to a list of # dictionaries that are each exactly one level deep). The key for # each item designates the name of the column that the item will # fall into. data = self.flatten_data(data) # Get the set of all unique headers, and sort them. headers = OrderedDict() for item in data: for k, v in item.iteritems(): if k not in headers: headers[k] = set() headers[k].add(type(v)) table = table3.ObjectTable(rows=data) for header in headers: fieldtype = headers[header] if len(fieldtype) == 1: fieldtype = list(fieldtype)[0] else: fieldtype = None fieldtype = {bool: str, type(None): str}.get(fieldtype, fieldtype) table.addColumn(label=header, col=partial(lambda key, item: item.get(key, None), header), fieldtype=fieldtype) return table
def parent_table(codebook): result = table3.ObjectTable(rows=codebook.codebookcodes) result.addColumn(lambda row: row.code.uuid, label="uuid") result.addColumn(lambda row: row.code.id, label="code_id") result.addColumn(lambda row: row.code, label="code") result.addColumn(lambda row: row.parent, label="parent") return result
def parent_table(self, codebook, labelcols): result = table3.ObjectTable(rows=codebook.codebookcodes) result.add_column(lambda row: str(row.code.uuid), label="uuid") result.add_column(lambda row: row.code.id, label="code_id") result.add_column(lambda row: getattr(row.parent, "id", None), label="parent_id") result.add_column(lambda row: row.code.label, label="label") self.add_label_columns(result) return result
def get_table(self, codingjobs, export_level, include_uncoded_sentences=False, include_uncoded_articles=False, **kargs): codingjobs = CodingJob.objects.prefetch_related("coded_articles__codings__values").filter(pk__in=codingjobs) # Get all row of table self.progress_monitor.update(5, "Preparing Jobs") rows = list(_get_rows( codingjobs, include_sentences=(int(export_level) != CODING_LEVEL_ARTICLE), include_multiple=True, include_uncoded_articles=include_uncoded_articles, include_uncoded_sentences=include_uncoded_sentences, progress_monitor=self.progress_monitor )) table = table3.ObjectTable(rows=rows) self.progress_monitor.update(5, "Preparing columns") # Meta field columns for field in _METAFIELDS: if self.options.get("meta_{field.object}_{field.attr}".format(**locals())): if field.object == "subsentence": table.addColumn(SubSentenceColumn(field)) elif field.attr == "date": table.addColumn(DateColumn(field.label, kargs["date_format"])) else: table.addColumn(MetaColumn(field)) # Date formatting (also belongs to meta) for id, label, strftime in DATE_FORMATS: if self.options.get("meta_{id}".format(id=id)): table.addColumn(DateColumn(label, strftime)) for field_name in AGGREGATABLE_FIELDS: codebook = self.options.get("aggregation_{field_name}".format(field_name=field_name)) language = self.options.get("aggregation_{field_name}_language".format(field_name=field_name)) not_found = self.options.get("aggregation_{field_name}_default".format(field_name=field_name)) if not codebook: continue codebook.cache_labels(language) table.addColumn(MappingMetaColumn( _MetaField("article", field_name, field_name + " aggregation"), codebook.get_aggregation_mapping(language), not_found )) # Build columns based on form schemafields for schemafield in self.bound_form.schemafields: prefix = _get_field_prefix(schemafield) if self.options[prefix + "_included"]: options = {k[len(prefix) + 1:]: v for (k, v) in self.options.iteritems() if k.startswith(prefix)} for label, function in schemafield.serialiser.get_export_columns(**options): table.addColumn(CodingColumn(schemafield, label, function)) return table
def values_table(self, unit_codings=False): """ Return the coded values in this job as a table3.Table with codings as rows and the fields in the columns; cells contain serialised values. """ schema_id = self.unitschema_id if unit_codings else self.articleschema_id fields = CodingSchemaField.objects.filter(codingschema=schema_id) columns = [SchemaFieldColumn(field) for field in fields] codings = Coding.objects.filter(codingjob=self, sentence__isnull=(not unit_codings)) codings = codings.prefetch_related("values", "values__field") codings = list(codings) return table3.ObjectTable(rows=codings, columns=columns)
def tree_table(codebook): rows = list(_get_tree(codebook)) result = table3.ObjectTable(rows=rows) result.addColumn(lambda row: row.code.uuid, label="uuid") result.addColumn(lambda row: row.code.id, label="code_id") depth = max(row.indent for row in rows) + 1 for i in range(depth): result.addColumn(TreeCodeColumn(i)) return result
def tree_table(self, codebook, language, labelcols): rows = list(_get_tree(codebook)) result = table3.ObjectTable(rows=rows) result.addColumn(lambda row: row.code.uuid, label="uuid") result.addColumn(lambda row: row.code.id, label="code_id") self.add_label_columns(result) depth = max(row.indent for row in rows) + 1 for i in range(depth): result.addColumn(TreeCodeColumn(i, language)) return result
def dict_to_columns(table, rowheader_label="group", rowheader_type=str, cell_type=int): result = table3.ObjectTable(rows=table.getRows()) result.addColumn( table3.ObjectColumn(label=rowheader_label, cellfunc=lambda row: row, fieldtype=rowheader_type)) for col in table.getColumns(): result.addColumn( table3.ObjectColumn(label=unicode(col), cellfunc=partial(table.getValue, column=col), fieldtype=cell_type)) return result
def get_nukes(sentence, transformer, statements_without_object): """Return a sequence of statements extracted from the roles""" read_node = lambda n : int(n) if n.strip() else None roles = [(read_node(s), p.replace(AMCAT, ""), read_node(o)) for (s,p,o) in transformer.query(select=["?spos", "?p", "?opos"], where="""?s ?p [:position ?opos] OPTIONAL {?s :position ?spos} FILTER (?p IN (:su, :obj, :quote, :eqv, :om))""")] statements = list(get_statements(sentence, roles, statements_without_object)) statements = [fill_out_statement(sentence, statement, roles) for statement in statements] statements = list(resolve_equivalence(statements)) statements = [add_frames(s) for s in statements] nuketable = table3.ObjectTable(rows = statements) nuketable.addColumn(lambda s : "/".join(s.type), "type") for col in "source", "subject", "predicate", "condition", "object": nuketable.addColumn(partial(Statement.get_lemmata, position=col), col) nuketable.addColumn(lambda s : s.frames, "frames") return nuketable
def get_response(self): r = self.read_network(self.options['network']) graph = self.get_graph(r) html = graph.getHTMLObject() dot = graph.getDot() edges = list(itertools.chain(*graph.edges.values())) t = table3.ObjectTable(rows=edges) def fmt(f, fmt="%1.1f"): if f is None: return "" return fmt % f t.addColumn(lambda e: e.subj.id, "subject") t.addColumn(lambda e: e.obj.id, "object") t.addColumn(lambda e: fmt(e.weight), "weight") t.addColumn(lambda e: fmt(e.sign, fmt="%+1.2f"), "quality") t.addColumn(lambda e: e.pred or "", "predicate") t.addColumn(lambda e: e.graph, "subgraph") html += tableoutput.table2html(t, printRowNames=False) html += "<pre>{dot}</pre>".format(**locals()) return HttpResponse(html, status=200, mimetype="text/html")
def fast_tablize(self, data): if not isinstance(data, list): raise ValueError("fast_tablize needs a list of (nested) dicts!") def _get_keys(item, prefix=()): for key, val in item.items(): if isinstance(val, list): raise ValueError("fast_tablize needs a list of (nested) dicts (not nested lists)!") if isinstance(val, dict): for nested_key, _type in _get_keys(val): yield (key,) + nested_key, _type else: yield (key,), type(val) def _get_val(d, key): val = d.get(key[0]) if val is None or len(key) == 1: return val return _get_val(val, key[1:]) keys = OrderedDict() for row in data: for key, _type in _get_keys(row): name = ".".join(key) if name in keys: keys[name][1].add(_type) else: keys[name] = (key, {_type}) table = table3.ObjectTable(rows=data) for col, (key, types) in keys.items(): fieldtype = list(types)[0] if len(types) == 1 else None fieldtype = {bool:str, type(None):str}.get(fieldtype, fieldtype) table.add_column(label=col, col=partial(_get_val, key=key), fieldtype=fieldtype) return table
if len(sys.argv) > 1: outdir = sys.argv[1] wwwroot = 'file://{outdir}'.format(**locals()) else: outdir = OUTDIR_DEFAULT wwwroot = WWWROOT_DEFAULT log = amcatlogging.setup() if not os.path.exists(outdir): os.makedirs(outdir) # make sure target exists script = sys.argv[0] log.info("Starting documentation by {script} at {stamp}".format(**locals())) doc = table3.ObjectTable() test = table3.ObjectTable() for reponame in REPONAMES: repolocation = REPOLOC.format(**locals()) # clone repositorymkdtemp tmpdir = tempfile.mkdtemp() repodir = '{tmpdir}/{reponame}'.format(**locals()) log.info( "{reponame}: Cloning {repolocation} to {repodir}".format(**locals())) repo = hg.clone(repolocation, repodir) for branch in repo.listbranches(): row = dict(repo=reponame, branch=branch) doc.rows.append(row)
def index(request): # build table with gold standard sentences ruleset = request.GET.get('ruleset', '').lower() if ruleset: ruleset = "_" + ruleset goldfile = GOLDFILE.format(**locals()) grammarfile = GRAMMARFILE.format(**locals()) g, gold_relations =get_gold(goldfile) comments = get_gold_comments(goldfile) # if rules are modified, store current values grammar_modified = os.path.getmtime(grammarfile) store_score = request.session.get('grammartime', None) != grammar_modified request.session['grammartime'] = grammar_modified sentences = AnalysisSentence.objects.filter(pk__in=g.keys()) metrics = {} # (sid, "tp"/"fn"/"fp") : score tt = get_tt(ruleset, gold_relations) for sentence in sentences: tt.load_sentence(sentence.id) tt.apply_lexical() tt.apply_rules() found = set(tt.get_roles()) print "--->", found gold = g[sentence.id] gold = set(do_gold_reality(found, gold)) tp = len(gold & found) fp = len(found - gold) fn = len(gold - found) pr = tp / float(tp + fp) if (tp + fp) else None re = tp / float(tp + fn) if (tp + fn) else None f = 2 * pr * re / (pr + re) if (pr or re) else 0 if tp + fp + fn == 0: f = None for metric in "tp fp fn pr re f".split(): metrics[sentence.id, metric] = locals()[metric] key = "semanticroles_fscore_%i" % sentence.id previous = request.session.get(key, None) metrics[sentence.id, "prev"] = "" if previous is None else previous metrics[sentence.id, "diff"] = "" if previous is None else colorize((f or 0) - previous) if store_score: request.session[key] = f sentencetable = table3.ObjectTable(rows=sentences) sentencetable.addColumn(lambda s : "<a href='{url}?ruleset={ruleset}'>{s.id}</a>".format(url=reverse('semanticroles-sentence', args=[s.id]), ruleset=ruleset[1:], s=s), "ID") sentencetable.addColumn(lambda s : unicode(s.sentence.sentence)[:60], "Sentence") sentencetable.addColumn(lambda s : "<br/>".join(comments.get(s.id, [])), "Remarks") def get_metric(metric, sentence): result = metrics[sentence.id, metric] if result is None: result = "" if isinstance(result, float): result = "%1.2f" % result return result for metric in ("tp","fp","fn", "f", "prev", "diff"): sentencetable.addColumn(partial(get_metric, metric), metric) sentencetablehtml = tableoutput.table2htmlDjango(sentencetable, safe=True) print grammar_modified, store_score return render(request, "navigator/semanticroles/index.html", locals())
writer = pspp.next() writer = EchoWriter(writer) log.debug("Creating SPS script and sending to PSPP") table2spss(t, writer=writer, saveas=filename) log.debug("Closing PSPP") out, err = pspp.next() log.debug("PSPPP err: %s" % err) log.debug("PSPPP out: %s" % out) err = err.replace('pspp: error creating "pspp.jnl": Permission denied', '') err = err.replace( 'pspp: ascii: opening output file "pspp.list": Permission denied', '') if err.strip(): raise Exception(err) if "error:" in out.lower(): raise Exception("PSPP Exited with error: \n\n%s" % out) if not os.path.exists(filename): raise Exception( "PSPP Exited without errors, but file was not saved.\n\nOut=%r\n\nErr=%r" % (out, err)) return filename if __name__ == '__main__': db = dbtoolkit.amcatDB() cj = codingjob.CodingJob(db, 4534) t = table3.ObjectTable(rows=codingjob.getCodedSentencesFromCodingjobs([cj ]), columns=map(SPSSFieldColumn, cj.unitSchema.fields)) print table2sav(t)