def load_neg_pos_specs() -> Dict[str, PosNegExample]: raw_data = {} for path in [man_data_path, yh_data_path, ba_data_path, label_data_path]: with open(path) as f: i = 0 json_data = json.load(f) for row in json_data['data']: fields = list(map(Field.from_obj, row['fields'])) spec_schema = Data(fields, row.get('num_rows')) src = json_data['source'] key = f'{src}-{i}' raw_data[key] = PosNegExample( key, spec_schema, row.get('task'), src, row['negative'], row['positive'] ) i += 1 return raw_data
def load_neg_pos_specs() -> Dict[str, PosNegExample]: raw_data = {} for path in [man_data_path, yh_data_path, ba_data_path, label_data_path]: with open(path) as f: i = 0 json_data = json.load(f) for row in json_data["data"]: fields = list(map(Field.from_obj, row["fields"])) spec_schema = Data(fields, row.get("num_rows")) src = json_data["source"] key = f"{src}-{i}" raw_data[key] = PosNegExample( key, spec_schema, row.get("task"), src, row["negative"], row["positive"], ) i += 1 return raw_data
def acquire_data(url): if url not in data_cache: data_cache[url] = Data.from_json( os.path.join(data_dir, os.path.basename(url))) # set the url to short name, since the one above set it to full name in the current machine data_cache[url].url = url return data_cache[url]
def __init__(self, distributions: Dict, type_distribution: Dict, definitions: Dict, data_schema: Dict, data_url: str) -> None: top_level_props = definitions['topLevelProps'] encoding_props = definitions['encodingProps'] data_fields = [ Field(x['name'], x['type'], cardinality=x['cardinality']) for x in data_schema ] self.model = Model(data_fields, distributions, type_distribution, top_level_props, encoding_props) self.data = Data(data_fields) self.data_url = data_url
def test_is_valid(): data = Data(fields=[Field('foo', 'number')]) invalid = Query.from_vegalite({ 'mark': 'text', 'encoding': { 'x': {'field': 'foo', 'type': 'quantitative'} } }) assert is_valid(Task(data, invalid)) == False valid = Query.from_vegalite({ 'mark': 'point', 'encoding': { 'x': {'field': 'foo', 'type': 'quantitative'} } }) assert is_valid(Task(data, valid)) == True
def test_count_violations(): data = Data.from_csv('examples/data/cars.csv') query_json = { 'mark': 'bar', 'data': { 'url': 'data/cars.csv' }, 'encoding': { 'x': { 'field': 'origin', 'type': 'ordinal' }, 'y': { 'field': 'horsepower', 'type': 'quantitative', 'aggregate': 'mean' } } } violations = count_violations(Task(data, Query.from_vegalite(query_json))) assert 'encoding' in violations.keys() assert violations.get('encoding') == 2
def __init__( self, distributions: Dict, type_distribution: Dict, definitions: Dict, data_schema: Dict, data_url: str, ) -> None: top_level_props = definitions["topLevelProps"] encoding_props = definitions["encodingProps"] data_fields = [ Field(x["name"], x["type"], cardinality=x["cardinality"]) for x in data_schema ] self.model = Model( data_fields, distributions, type_distribution, top_level_props, encoding_props, ) self.data = Data(data_fields) self.data_url = data_url
def get_rec(data, query): query = Query.from_obj(query) input_task = Task(data, query) return run(input_task) def run_spec(data, spec): query = Query.from_vegalite(spec) input_task = Task(data, query) return run(input_task) spec_schema = Data([ Field('q1', 'number', 100, 1), Field('q2', 'number', 100, 1), Field('o1', 'number', 6, 1), Field('n1', 'string', 5, 1) ], 100, url='data.csv') class TestSpecs(): def test_scatter(self): recommendation = get_rec(spec_schema, { 'encoding': [{ 'channel': 'x', 'field': 'q1' }, { 'field': 'q2' }] }).to_vegalite()
from draco.spec import Data, Field, Query, Task from draco.generation.helper import is_valid data = Data(fields=[ Field('n1', 'string'), Field('n2', 'string'), Field('q1', 'number'), Field('q2', 'number'), Field('q3', 'number') ]) class TestValidSpecs(): def test_hist(self): query = Query.from_vegalite({ 'mark': 'bar', 'encoding': { 'x': { 'type': 'quantitative', 'field': 'q1', 'bin': True }, 'y': { 'type': 'quantitative', 'aggregate': 'count' } } }) assert is_valid(Task(data, query), True) == True
def test_generate(self): data = Data(fields=[Field('foo', 'number', 10, 2.1, [1,2])]) assert data.content == None data.fill_with_random_content() assert data.content != None
def test_to_asp(self): data = Data([Field('foo', 'number', 10, 0.4)], 42) assert data.to_asp() == 'num_rows(42).\n\nfieldtype(foo,number).\ncardinality(foo,10).\nentropy(foo,4).\n'