示例#1
0
def pair_partition_to_vec(input_data: Tuple[Dict, Tuple[str,str], Iterable[Union[PosNegExample, UnlabeledExample, np.ndarray]]]):
    processed_specs, fields, partiton_data = input_data

    columns = get_nested_index(fields)
    dfs = []

    for example in partiton_data:
        Encoding.encoding_cnt = 0

        # hack to get named tuples to work in parallel
        if isinstance(example, np.ndarray):
            example = PosNegExample(*example)

        # use numbers because we odn't know the names here
        neg_feature_vec = count_violations_memoized(processed_specs,
                            Task(example.data, Query.from_vegalite(example[4]), example.task))
        pos_feature_vec = count_violations_memoized(processed_specs,
                            Task(example.data, Query.from_vegalite(example[5]), example.task))

        # Reformat the json data so that we can insert it into a multi index data frame.
        # https://stackoverflow.com/questions/24988131/nested-dictionary-to-multiindex-dataframe-where-dictionary-keys-are-column-label
        specs = {(fields[0], key): values for key, values in neg_feature_vec.items()}
        specs.update({(fields[1], key): values for key, values in pos_feature_vec.items()})

        specs[('source', '')] = example.source
        specs[('task', '')] = example.task

        dfs.append(pd.DataFrame(specs, columns=columns, index=[example.pair_id]))

    return pd.concat(dfs)
示例#2
0
    def __mutate_spec(self, base_spec: Spec, props: List[str], prop_index: int,
                      seen: Set[Spec], specs: List[Spec]):
        # base case
        if (prop_index == len(props)):
            self.model.post_improve(base_spec, props)
            base_spec['data'] = {'url': self.data_url}

            # within a group, don't repeat the same specs
            if not (base_spec in seen):
                seen.add(base_spec)

                query = Query.from_vegalite(base_spec)

                if (is_valid(Task(self.data, query))):
                    specs.append(base_spec)
        # recursive case
        else:
            prop_to_mutate = props[prop_index]
            for enum in self.model.get_enums(prop_to_mutate):
                spec = deepcopy(base_spec)
                self.model.mutate_prop(spec, prop_to_mutate, enum)

                # recursive call
                self.__mutate_spec(spec, props, prop_index + 1, seen, specs)

        return
示例#3
0
    def test_stack_agg(self):
        query = Query.from_vegalite({
            'mark': 'bar',
            'encoding': {
                'x': {
                    'type': 'nominal',
                    'field': 'n1',
                },
                'y': {
                    'type': 'quantitative',
                    'field': 'q1',
                    'stack': 'zero',
                    'aggregate': 'sum'
                },
                'detail': {
                    'type': 'nominal',
                    'field': 'n2'
                },
                'color': {
                    'type': 'quantitative',
                    'field': 'q2',
                    'aggregate': 'mean'
                }
            }
        })

        assert is_valid(Task(data, query), True) == True
示例#4
0
def test_is_valid():
    data = Data(fields=[Field('foo', 'number')])

    invalid = Query.from_vegalite({
        'mark': 'text',
        'encoding': {
            'x': {'field': 'foo', 'type': 'quantitative'}
        }
    })

    assert is_valid(Task(data, invalid)) == False

    valid = Query.from_vegalite({
        'mark': 'point',
        'encoding': {
            'x': {'field': 'foo', 'type': 'quantitative'}
        }
    })
    assert is_valid(Task(data, valid)) == True
示例#5
0
    def test_one_bar(self):
        query = Query.from_vegalite({
            'mark': 'bar',
            'encoding': {
                'y': {
                    'type': 'quantitative',
                    'field': 'q1'
                }
            }
        })

        assert is_valid(Task(data, query), True) == True
示例#6
0
    def test_row_only(self):
        query = Query.from_vegalite({
            'mark': 'point',
            'encoding': {
                'row': {
                    'type': 'nominal',
                    'field': 'n1'
                }
            }
        })

        assert is_valid(Task(data, query), True) == False
示例#7
0
    def test_only_one_agg(self):
        query = Query.from_vegalite({
            'mark': 'point',
            'encoding': {
                'x': {
                    'type': 'quantitative',
                    'field': 'q1'
                },
                'y': {
                    'type': 'quantitative',
                    'field': 'q2',
                    'aggregate': 'mean'
                }
            }
        })

        assert is_valid(Task(data, query), True) == False
示例#8
0
    def test_heatmap(self):
        query = Query.from_vegalite({
            'mark': 'rect',
            'encoding': {
                'x': {
                    'type': 'nominal',
                    'field': 'n1',
                },
                'y': {
                    'type': 'ordinal',
                    'field': 'q1',
                    'bin': True
                }
            }
        })

        assert is_valid(Task(data, query), True) == True
示例#9
0
    def test_hist(self):
        query = Query.from_vegalite({
            'mark': 'bar',
            'encoding': {
                'x': {
                    'type': 'quantitative',
                    'field': 'q1',
                    'bin': True
                },
                'y': {
                    'type': 'quantitative',
                    'aggregate': 'count'
                }
            }
        })

        assert is_valid(Task(data, query), True) == True
示例#10
0
    def test_no_auto_bin(self):
        q = Query.from_vegalite({
            'mark': 'bar',
            'encoding': {
                'x': {
                    'type': 'nominal'
                },
                "y": {
                    'type': 'quantitative',
                    'aggregate': 'mean'
                },
                'color': {
                    'type': 'nominal'
                }
            }
        })

        encs = [e for e in q.encodings if e.stack]

        assert len(encs) == 0
示例#11
0
def test_count_violations():
    data = Data.from_csv('examples/data/cars.csv')
    query_json = {
        'mark': 'bar',
        'data': {
            'url': 'data/cars.csv'
        },
        'encoding': {
            'x': {
                'field': 'origin',
                'type': 'ordinal'
            },
            'y': {
                'field': 'horsepower',
                'type': 'quantitative',
                'aggregate': 'mean'
            }
        }
    }
    violations = count_violations(Task(data, Query.from_vegalite(query_json)))

    assert 'encoding' in violations.keys()
    assert violations.get('encoding') == 2
示例#12
0
    def test_scatter(self):
        query = Query.from_vegalite({
            'mark': 'point',
            'encoding': {
                'x': {
                    'type': 'quantitative',
                    'field': 'q1',
                },
                'y': {
                    'type': 'quantitative',
                    'field': 'q2'
                },
                'color': {
                    'type': 'nominal',
                    'field': 'n2'
                },
                'size': {
                    'type': 'quantitative',
                    'field': 'q3'
                }
            }
        })

        assert is_valid(Task(data, query), True) == True
示例#13
0
    def test_stack_q_q(self):
        query = Query.from_vegalite({
            'mark': 'area',
            'encoding': {
                'x': {
                    'type': 'quantitative',
                    'field': 'q1',
                    'scale': {
                        'zero': False
                    }
                },
                'y': {
                    'type': 'quantitative',
                    'field': 'q2',
                    'stack': 'zero'
                },
                'color': {
                    'type': 'nominal',
                    'field': 'n1'
                }
            }
        })

        assert is_valid(Task(data, query), True) == True
示例#14
0
def run_spec(data, spec):
    query = Query.from_vegalite(spec)
    input_task = Task(data, query)
    return run(input_task)