def save(self): if self.has_folder or self.is_actions_found: name = constants.INFO_FILE else: name = "{}.json".format(self.attrs["Name"]) if "Type" in self.attrs \ and self.attrs['Type'] in constants.EXTERNAL_SOURCE_TYPES \ and "source" in self.attributes: source_name = name + constants.EXTERNAL_SOURCE_TYPES[ self.attrs['Type']] PARSER.write_file(source_name, "".join(self.attributes["source"])) self.attrs["source_file_name"] = source_name del self.attributes["source"] self.attributes = { key: encode(clean_data("".join(val))).split('\n') for (key, val) in self.attributes.items() } data = OrderedDict([("attrs", sort_dict(self.attrs)), ("attributes", sort_dict(self.attributes))]) data = json.dumps(data, indent=4) detect_guids(data) PARSER.write_file(name, data) if self.childs_order: order_data = json.dumps(self.childs_order, indent=4) detect_guids(order_data) PARSER.write_file(constants.CHILDS_ORDER, order_data) if self.has_folder or self.is_actions_found: PARSER.pop_from_current_path()
def child_end(self, tagname): if self.current_mode == "Events" and tagname == "Event": page = PARSER.pages.get(self.current_node["ContainerID"], "") if page: page["events"].append(sort_dict(self.current_node)) for action in self.current_node["actions"]: if not page["actions"].get(action, ""): page["actions"][action] = "" self.current_node = "" elif self.current_mode == "Events" and tagname == "Events": self.current_mode = "" elif self.current_mode == "Actions" and tagname == "Action": if not self.current_node["Params"]: del self.current_node["Params"] self.actions[self.current_node["ID"]] = sort_dict( self.current_node) self.current_node = "" elif self.current_mode == "Actions" and tagname == "Actions": self.current_mode = "" elif self.current_mode == "Actions" and tagname == "Parameter": self.is_data_allowed = False self.current_node["Params"][-1][1] = \ clean_data("".join(self.current_node["Params"][-1][1]))
def main(file: str) -> None: logging.info(f'Loading {file} into Postgres database.') # Schema Details schema_info = schema.DATA_SCHEMA[file] table_name = schema_info['tablename'] file_name = schema_info['filename'] query = schema_info['query'] # Clean data if file == 'products': temp_df = pd.read_csv(file_name) df = helpers.clean_data(temp_df) df.to_csv(f'tempdata/{file}.csv', index=False) file_name = f'tempdata/{file}.csv' logging.info(f'Inserting data into {table_name} table.') logging.info(config.CREDENTIALS) conn = psycopg2.connect(host=config.CREDENTIALS['host'], database=config.CREDENTIALS['database'], user=config.CREDENTIALS['user']) logging.info(conn) cur = conn.cursor() cur.execute(query) conn.commit() with open(file_name, 'r') as f: next(f) cur.copy_from(f, table_name, sep=',') conn.commit()
def write_attributes(attributes, indent): write_xml("Attributes", indent=indent) for key, value in attributes.items(): if isinstance(value, list): value = "\n".join(value) write_xml( "Attribute", attrs={"Name": key}, data=clean_data(encode(value)), indent=indent+2, close=True ) write_xml("Attributes", indent=indent, closing=True)
def write_object(path, name, indent): with open_file(os.path.join(path, name)) as obj_file: obj_json = json_load(obj_file, critical=True) if "Type" in obj_json["attrs"] \ and obj_json["attrs"]["Type"] in constants.EXTERNAL_SOURCE_TYPES \ and "source_file_name" in obj_json["attrs"]: source_file_name = obj_json["attrs"]["source_file_name"] del obj_json["attrs"]["source_file_name"] with open_file(os.path.join(path, source_file_name)) as source_file: obj_json["attributes"]["source"] = clean_data(source_file.read()).decode('utf-8') write_xml("Object", attrs=obj_json["attrs"], indent=indent) write_xml("Actions", indent=indent+2, data="", close=True) write_xml("Objects", indent=indent+2, data="", close=True) write_attributes(obj_json["attributes"], indent+2) write_xml("Object", indent=indent, closing=True)
def end(self): global ACTION_EXT # remove unnecessary symbols from data and encode it for key, value in self.data.items(): self.data[key] = encode(clean_data("".join(value))) # detect application programming language ACTION_EXT = { "python": ".py", "vscript": ".vb" }.get(self.data["ScriptingLanguage"].lower(), "python") INFO("Sripts extention will be '*%s'", ACTION_EXT) INFO("Completed: Application Information") self.save() super(InformationTagHandler, self).end()
def main(): if len(sys.argv) < 2: print("Usage: python race.py <datafile.csv>") sys.exit() data = helpers.import_data(sys.argv[1]) cleaned = helpers.clean_data(data) stats = list() for row in cleaned: vf = helpers.final_velocity(row["Acceleration"], LENGTH - row["Distance"], row["Velocity"]) time = row["Time"] + helpers.time(LENGTH - row["Distance"], row["Velocity"], vf) stats.append({ "Person": row["Person"], "Time": time, "AvgV": LENGTH / time, "FinalV": vf }) helpers.print_winner(stats) helpers.export_data(stats, "stats.csv")
array_humans = [] #Go over all the humans in the photo for human in humans: array_human = [] #For every single limb that each human has for i in range(0, 16): try: #record the limb coordinates array_human.append(human.body_parts[i].x) array_human.append(human.body_parts[i].y) #If there are no coordinates for a certain limb record null except KeyError: array_human.append(0.0) array_human.append(0.0) #Improve the generated feature extraction by removing unclear data points if helpers.clean_data(array_human) is None: array_human = [] else: array_humans.append(array_human) array_human = [] #Count the number of humans who are in each category in the video if len(array_humans) > 0: sitting, standing, laying = helpers.count_predictions(clf.predict(array_humans)) currentDT = datetime.datetime.now() #Send the gathered data to the cloud platform. #The following code will cause the program to crash if you do not have #Sapient and sapient-server running when you execute it. #Uncomment it if you wish to run locally. #--------------------------------------------------------------------------------------------------------#
# ----------------------------------------------------------------------Imports------------------------------------------------------------------------------ import pandas as pd from helpers import (clean_data, get_pearson, get_max, get_min, scatter_plot, p_values_paired, p_values_ind, compare) # ----------------------------------------------------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------Load the data--------------------------------------------------------------------------- healthy = pd.read_csv('lusc-rsem-fpkm-tcga_paired.txt', sep='\t') cancerous = pd.read_csv('lusc-rsem-fpkm-tcga-t_paired.txt', sep='\t') # ----------------------------------------------------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------Clean the data-------------------------------------------------------------------------- clean_data(healthy, cancerous, 25) # ----------------------------------------------------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------Get Pearson CC-------------------------------------------------------------------------- correlation, indexes = get_pearson(healthy, cancerous) # ----------------------------------------------------------------------------------------------------------------------------------------------------------- # -----------------------------------------------------------------Get Max/Min Index------------------------------------------------------------------------- max_key, max_value, max_index = get_max(correlation, indexes) # ----------------------------------------------------------------------------------------------------------------------------------------------------------- # -----------------------------------------------------------------Plot Max/Min Gene------------------------------------------------------------------------- scatter_plot(healthy, cancerous, max_index[0], xlabel='H_Expression_Level', ylabel='C_Expression_Level', title='Maximum Correlation Gene') # -----------------------------------------------------------------------------------------------------------------------------------------------------------