def main(): # configure logging logging.basicConfig(level=logging.INFO) logging.info(cf_netSDM) # Read inputs inputs = io_helper.fetch_data() data = inputs['data'] normalize = get_param(inputs['parameters'], 'normalize', bool, 'True') damping = get_param(inputs['parameters'], 'damping', float, '0.85') data_array = np.zeros((len(data['independent'][0]['series']), len(data['independent']))) col_number = 0 row_number = 0 for var in data['independent']: for value in var['series']: data_array[row_number, col_number] = value row_number += 1 col_number += 1 row_number = 0 if normalize: for col_number in range(data_array.shape[1]): data_array[:, col_number] = data_array[:, col_number] / np.linalg.norm(data_array[:, col_number]) network = construct_adjacency_graph(range(data_array.shape[0]), data_array, data['dependent'][0]['series']) propositionalized = cf_netSDM.hinmine_propositionalize(network, damping)['train_features']['data'] results_dict = { 'profile': 'tabular-data-resource', 'name': 'hinmine-features', 'data': [], 'schema': { 'fields': [], 'primaryKey': 'id' } } n = propositionalized.shape[0] for row_index in range(n): instance = {"id": row_index} for col_index in range(n): instance["feature_%i" % (col_index + 1)] = propositionalized[row_index, col_index] results_dict['data'].append(instance) for col_index in range(n): results_dict['schema']['fields'].append({'name': 'feature_%i' % (col_index + 1), 'type': 'float'}) io_helper.save_results(json.dumps(results_dict), '', 'text/plain')
def main(): # Configure logging logging.basicConfig(level=logging.INFO) # Read inputs inputs = io_helper.fetch_data() dep_var = inputs["data"]["dependent"][0] inped_vars = inputs["data"]["independent"] # Check dependent variable type (should be continuous) if dep_var["type"]["name"] not in ["integer", "real"]: logging.warning("Dependent variable should be continuous !") return None # Extract data and parameters from inputs data = format_data(inputs["data"]) # Compute linear-regression and generate PFA output linear_regression_results = format_output(compute_linear_regression(dep_var, inped_vars, data)) # Store results io_helper.save_results(linear_regression_results, '', 'application/highcharts+json')
def main(): # Configure logging logging.basicConfig(level=logging.INFO) # Read inputs inputs = io_helper.fetch_data() dep_var = inputs["data"]["dependent"][0] inped_vars = inputs["data"]["independent"] design = get_parameter(inputs["parameters"], DESIGN_PARAM) # Check dependent variable type (should be continuous) if dep_var["type"]["name"] not in ["integer", "real"]: logging.warning("Dependent variable should be continuous !") return None # Extract data and parameters from inputs data = format_data(inputs["data"]) # Compute anova and generate PFA output anova_results = format_output(compute_anova(dep_var, inped_vars, data, design).to_dict()) # Store results io_helper.save_results(anova_results, '', 'application/highcharts+json')
def main(): # Configure logging logging.basicConfig(level=logging.INFO) # Read inputs inputs = io_helper.fetch_data() try: dep_var = inputs["data"]["dependent"][0] except KeyError: logging.warning("Cannot find dependent variables data") dep_var = [] try: indep_vars = inputs["data"]["independent"] except KeyError: logging.warning("Cannot find independent variables data") indep_vars = [] nb_bins = get_bins_param(inputs["parameters"], BINS_PARAM) # Compute histograms (JSON formatted for HighCharts) histograms_results = compute_histograms(dep_var, indep_vars, nb_bins) # Store results io_helper.save_results(histograms_results, '', 'application/highcharts+json')
import tempfile import logging from subprocess import call from io_helper import io_helper import preprocess DEFAULT_DOCKER_IMAGE = 'python-hedwig' if __name__ == '__main__': # Configure logging logging.basicConfig(level=logging.INFO) # Read inputs inputs = io_helper.fetch_data() data = inputs["data"] out_file = 'input.csv' rules_out_file = 'rules.txt' matrix, attributes = preprocess.to_matrix(data) preprocess.dump_to_csv(matrix, attributes, out_file) # Call hedwig with sensible defaults examples_file = out_file empty_bk = tempfile.mkdtemp() call([ 'python', '-m', 'hedwig', empty_bk, examples_file, '-f', 'csv', '-l', '-o', rules_out_file, '--nocache'
def main(): logging.basicConfig(level=logging.INFO) inputs = io_helper.fetch_data() # Dependent variable for tsne this might be the labels - this is optional labels = None dependent = inputs["data"].get("dependent", []) indep_vars = inputs["data"]["independent"] # For tsne the data dimensions if not data_types_in_allowed(indep_vars, ["integer", "real"]): logging.warning("Independent variables should be continuous !") return None # data = format_independent_data(inputs["data"]) df = pd.DataFrame.from_dict(data) source_dimensions = df.shape[1] # number of columns num_points = df.shape[0] # number of samples/points convdf = df.apply(lambda x: pd.to_numeric(x)) # Write the data to a temporary file f = tempfile.NamedTemporaryFile(delete=False) input = convdf.values.astype(np.float32) logging.debug('input {}'.format(input)) # Get the parameters (optional) perplexity = 30 theta = 0.5 target_dimensions = 2 iterations = 1000 do_zscore = True dependent_is_label = True try: perplexity = get_parameter(inputs['parameters'], 'perplexity', perplexity) theta = get_parameter(inputs['parameters'], 'theta', theta) target_dimensions = get_parameter(inputs['parameters'], 'target_dimensions', target_dimensions) iterations = get_parameter(inputs['parameters'], 'iterations', iterations) do_zscore_str = get_parameter(inputs['parameters'], 'do_zscore', str(do_zscore)) if do_zscore_str == 'True': do_zscore = True elif do_zscore_str == 'False': do_zscore = False else: raise ValueError dependent_is_label_str = get_parameter(inputs['parameters'], 'dependent_is_label', str(dependent_is_label)) if dependent_is_label_str == 'True': dependent_is_label = True elif dependent_is_label_str == 'False': dependent_is_label = False else: raise ValueError except ValueError as e: logging.error("Could not convert supplied parameter to value, error: ", e) raise except: logging.error(" Unexpected error:", sys.exec_info()[0]) raise # Compute results if do_zscore: input = scipy.stats.zscore(input) if len(dependent) > 0 and dependent_is_label: dep_var = dependent[0] labels = dep_var["series"] inputFilePath = f.name input.tofile(inputFilePath) f.close() f = tempfile.NamedTemporaryFile(delete=False) outputFilePath = f.name f.close() output = a_tsne(inputFilePath, outputFilePath, num_points, source_dimensions, target_dimensions, perplexity, theta, iterations) logging.debug('output shape {}'.format(output.shape)) logging.debug('output {}'.format(output)) chart = generate_scatterchart(output, indep_vars, labels, perplexity, theta, iterations) error = '' shape = 'application/highcharts+json' logging.debug("Highchart: %s", chart) io_helper.save_results(chart, error, shape) logging.info("Highchart output saved to database.")