Exemplos de clean_data em Python, exemplos de helpers.clean_data em Python

Exemplo n.º 1

0

Exibir arquivo

    def save(self):
        if self.has_folder or self.is_actions_found:
            name = constants.INFO_FILE
        else:
            name = "{}.json".format(self.attrs["Name"])

        if "Type" in self.attrs \
            and self.attrs['Type'] in constants.EXTERNAL_SOURCE_TYPES \
            and "source" in self.attributes:
            source_name = name + constants.EXTERNAL_SOURCE_TYPES[
                self.attrs['Type']]
            PARSER.write_file(source_name, "".join(self.attributes["source"]))
            self.attrs["source_file_name"] = source_name
            del self.attributes["source"]

        self.attributes = {
            key: encode(clean_data("".join(val))).split('\n')
            for (key, val) in self.attributes.items()
        }

        data = OrderedDict([("attrs", sort_dict(self.attrs)),
                            ("attributes", sort_dict(self.attributes))])

        data = json.dumps(data, indent=4)
        detect_guids(data)

        PARSER.write_file(name, data)

        if self.childs_order:
            order_data = json.dumps(self.childs_order, indent=4)
            detect_guids(order_data)
            PARSER.write_file(constants.CHILDS_ORDER, order_data)

        if self.has_folder or self.is_actions_found:
            PARSER.pop_from_current_path()

Exemplo n.º 2

0

Exibir arquivo

    def child_end(self, tagname):

        if self.current_mode == "Events" and tagname == "Event":
            page = PARSER.pages.get(self.current_node["ContainerID"], "")

            if page:
                page["events"].append(sort_dict(self.current_node))
                for action in self.current_node["actions"]:
                    if not page["actions"].get(action, ""):
                        page["actions"][action] = ""

            self.current_node = ""

        elif self.current_mode == "Events" and tagname == "Events":
            self.current_mode = ""

        elif self.current_mode == "Actions" and tagname == "Action":
            if not self.current_node["Params"]:
                del self.current_node["Params"]

            self.actions[self.current_node["ID"]] = sort_dict(
                self.current_node)
            self.current_node = ""

        elif self.current_mode == "Actions" and tagname == "Actions":
            self.current_mode = ""

        elif self.current_mode == "Actions" and tagname == "Parameter":
            self.is_data_allowed = False
            self.current_node["Params"][-1][1] = \
                clean_data("".join(self.current_node["Params"][-1][1]))

Exemplo n.º 3

0

Exibir arquivo

def main(file: str) -> None:
    logging.info(f'Loading {file} into Postgres database.')

    # Schema Details
    schema_info = schema.DATA_SCHEMA[file]
    table_name = schema_info['tablename']
    file_name = schema_info['filename']
    query = schema_info['query']

    # Clean data
    if file == 'products':
        temp_df = pd.read_csv(file_name)
        df = helpers.clean_data(temp_df)
        df.to_csv(f'tempdata/{file}.csv', index=False)
        file_name = f'tempdata/{file}.csv'

    logging.info(f'Inserting data into {table_name} table.')
    logging.info(config.CREDENTIALS)
    conn = psycopg2.connect(host=config.CREDENTIALS['host'],
                            database=config.CREDENTIALS['database'],
                            user=config.CREDENTIALS['user'])
    logging.info(conn)

    cur = conn.cursor()
    cur.execute(query)
    conn.commit()
    with open(file_name, 'r') as f:
        next(f)
        cur.copy_from(f, table_name, sep=',')

    conn.commit()

Exemplo n.º 4

0

Exibir arquivo

def write_attributes(attributes, indent):
    write_xml("Attributes", indent=indent)
    for key, value in attributes.items():
        if isinstance(value, list):
            value = "\n".join(value)
        write_xml(
            "Attribute",
            attrs={"Name": key},
            data=clean_data(encode(value)),
            indent=indent+2,
            close=True
        )
    write_xml("Attributes", indent=indent, closing=True)

Exemplo n.º 5

0

Exibir arquivo

def write_object(path, name, indent):
    with open_file(os.path.join(path, name)) as obj_file:
        obj_json = json_load(obj_file, critical=True)

    if "Type" in obj_json["attrs"] \
        and obj_json["attrs"]["Type"] in constants.EXTERNAL_SOURCE_TYPES \
        and "source_file_name" in obj_json["attrs"]:
            source_file_name = obj_json["attrs"]["source_file_name"]
            del obj_json["attrs"]["source_file_name"]
            with open_file(os.path.join(path, source_file_name)) as source_file:
                obj_json["attributes"]["source"] = clean_data(source_file.read()).decode('utf-8')

    write_xml("Object", attrs=obj_json["attrs"], indent=indent)
    write_xml("Actions", indent=indent+2, data="", close=True)
    write_xml("Objects", indent=indent+2, data="", close=True)
    write_attributes(obj_json["attributes"], indent+2)
    write_xml("Object", indent=indent, closing=True)

Exemplo n.º 6

0

Exibir arquivo

    def end(self):
        global ACTION_EXT

        # remove unnecessary symbols from data and encode it
        for key, value in self.data.items():
            self.data[key] = encode(clean_data("".join(value)))

        # detect application programming language
        ACTION_EXT = {
            "python": ".py",
            "vscript": ".vb"
        }.get(self.data["ScriptingLanguage"].lower(), "python")

        INFO("Sripts extention will be '*%s'", ACTION_EXT)
        INFO("Completed: Application Information")

        self.save()
        super(InformationTagHandler, self).end()

Exemplo n.º 7

0

Exibir arquivo

Arquivo: race.py Projeto: jsarchibald/cs50

def main():
    if len(sys.argv) < 2:
        print("Usage: python race.py <datafile.csv>")
        sys.exit()

    data = helpers.import_data(sys.argv[1])
    cleaned = helpers.clean_data(data)

    stats = list()
    for row in cleaned:
        vf = helpers.final_velocity(row["Acceleration"],
                                    LENGTH - row["Distance"], row["Velocity"])
        time = row["Time"] + helpers.time(LENGTH - row["Distance"],
                                          row["Velocity"], vf)
        stats.append({
            "Person": row["Person"],
            "Time": time,
            "AvgV": LENGTH / time,
            "FinalV": vf
        })

    helpers.print_winner(stats)
    helpers.export_data(stats, "stats.csv")

Exemplo n.º 8

0

Exibir arquivo

            array_humans = []
            #Go over all the humans in the photo
            for human in humans:
                array_human = []
                #For every single limb that each human has
                for i in range(0, 16):
                    try:
                        #record the limb coordinates
                        array_human.append(human.body_parts[i].x)
                        array_human.append(human.body_parts[i].y)
                        #If there are no coordinates for a certain limb record null
                    except KeyError:
                        array_human.append(0.0)
                        array_human.append(0.0)
                #Improve the generated feature extraction by removing unclear data points
                if helpers.clean_data(array_human) is None:
                    array_human = []
                else:
                    array_humans.append(array_human)
                    array_human = []

            #Count the number of humans who are in each category in the video
            if len(array_humans) > 0:
                sitting, standing, laying = helpers.count_predictions(clf.predict(array_humans))

                currentDT = datetime.datetime.now()
                #Send the gathered data to the cloud platform.
                #The following code will cause the program to crash if you do not have
                #Sapient and sapient-server running when you execute it.
                #Uncomment it if you wish to run locally.
                #--------------------------------------------------------------------------------------------------------#

Exemplo n.º 9

0

Exibir arquivo

Arquivo: main.py Projeto: mohamed-abdelaziz721/Biostatistics--DEGs-for-LUSC

# ----------------------------------------------------------------------Imports------------------------------------------------------------------------------
import pandas as pd
from helpers import (clean_data, get_pearson, get_max, get_min, scatter_plot,
                     p_values_paired, p_values_ind, compare)
# -----------------------------------------------------------------------------------------------------------------------------------------------------------

# -------------------------------------------------------------------Load the data---------------------------------------------------------------------------
healthy = pd.read_csv('lusc-rsem-fpkm-tcga_paired.txt', sep='\t')
cancerous = pd.read_csv('lusc-rsem-fpkm-tcga-t_paired.txt', sep='\t')
# -----------------------------------------------------------------------------------------------------------------------------------------------------------

# -------------------------------------------------------------------Clean the data--------------------------------------------------------------------------
clean_data(healthy, cancerous, 25)
# -----------------------------------------------------------------------------------------------------------------------------------------------------------

# -------------------------------------------------------------------Get Pearson CC--------------------------------------------------------------------------
correlation, indexes = get_pearson(healthy, cancerous)
# -----------------------------------------------------------------------------------------------------------------------------------------------------------

# -----------------------------------------------------------------Get Max/Min Index-------------------------------------------------------------------------
max_key, max_value, max_index = get_max(correlation, indexes)
# -----------------------------------------------------------------------------------------------------------------------------------------------------------

# -----------------------------------------------------------------Plot Max/Min Gene-------------------------------------------------------------------------
scatter_plot(healthy,
             cancerous,
             max_index[0],
             xlabel='H_Expression_Level',
             ylabel='C_Expression_Level',
             title='Maximum Correlation Gene')
# -----------------------------------------------------------------------------------------------------------------------------------------------------------