Python CsvReader示例，csv_reader.CsvReader Python示例

示例#1

0

显示文件

    def handleBrowseData(self):
        """
        Browse and read the input csv file
        """

        # set old one to null
        self.csv_reader = None
        self.attr_type = None
        self.attrs = {}
        self.lstItems.clear()
        self.lstEnumUnique.clear()
        self.field_type = None

        filenames = getFileFromDialog()
        self.teBrowseData.setText(filenames[0])

        # creating csv reader
        self.csv_path = filenames[0]
        self.csv_reader = CsvReader(self.csv_path)

        # adding list items on left side
        for col in self.csv_reader.get_col_pd_names():
            self.lstItems.addItem(col)

        # select first row if there are elements in list
        if self.lstItems.count() > 0:
            self.lstItems.setCurrentRow(0)
            self.attr_type = self.lstItems.currentItem().text()

        self.refresh_enumerated_list_values()

        self.populate_attributes_data()

        self.showCurrentAttributeDataInWidgets()

示例#2

0

显示文件

文件： importer.py 项目： dvdsantana/monitoring

def main():
    logging.basicConfig(format='%(asctime)s %(message)s',
                        datefmt='%m/%d/%Y %I:%M:%S %p')

    db = Store()
    reader = CsvReader()

    data = reader.get_data_from_csv('Monitoring report.csv')
    if data is None:
        exit

    conn = db.create_connection(db_file='monitoring-report.db')
    if conn is None:
        exit

    # Create the monitoring table
    sql_create_table = "CREATE TABLE IF NOT EXISTS monitoring(date, energy, reactive_energy, power, maximeter, reactive_power, voltage, intensity, power_factor)"

    if db.create_table(conn, sql_create_table) is None:
        exit

    values = []
    for index, row in data.iterrows():
        values.append(row)

    db.insert_data(conn, values)

    conn.close()

示例#3

0

显示文件

文件： import_data.py 项目： niab/hwe

def import_seg_dups(db):
    db.hw.seg_dups.drop()
    segmental_dup = CsvReader(USCS_SEGMENTAL_DUP_TSV, delimiter='\t')

    filtered_data = []
    for document in segmental_dup.data:
        document['chrom'] = document['chrom'][3:]
        document['otherChrom'] = document['otherChrom'][3:]

        if len(document['chrom']) < 3:
            xstart = get_xpos(document['chrom'], document['chromStart'])
            xend = get_xpos(document['chrom'], document['chromEnd'])
            document['xstart'] = xstart
            document['xend'] = xend
            filtered_data.append(document)

    segmental_dup.data = filtered_data
    segmental_dup.import_to_db(db.hw, 'seg_dups')

    db.hw.seg_dups.create_index([('chrom', pymongo.ASCENDING)], name='chrom_1')
    db.hw.seg_dups.create_index([('chromStart', pymongo.ASCENDING)],
                                name='chromStart_1')
    db.hw.seg_dups.create_index([('chromEnd', pymongo.ASCENDING)],
                                name='chromEnd_1')
    db.hw.seg_dups.create_index([('xstart', pymongo.ASCENDING)],
                                name='xstart_1')
    db.hw.seg_dups.create_index([('xend', pymongo.ASCENDING)], name='xend_1')

示例#4

0

显示文件

文件： mesh.py 项目： Hubert-Guzowski/RealTimePoseEstimation

    def load(self, path):
        csv_reader = CsvReader(path)
        self.list_vertex, self.list_triangles = csv_reader.read_ply()
        self.list_triangles = np.array(self.list_triangles)
        self.list_vertex = np.array(self.list_vertex)

        self.num_vertex = len(self.list_vertex)
        self.num_triangles = len(self.list_triangles)

示例#5

0

显示文件

    def __init__ (self, path=None):
        if path is None:
            path = "/Users/ostwald/devel/opensky/pubs_to_grants/ARTICLES_award_id_data/DOI_Reference_TABLE.csv"
        self.last_mod_map = {}
        self.pid_map = {}
        CsvReader.__init__ (self, path)

        for rec in self.data:
            self.last_mod_map[rec['lastmod']] = rec
            self.pid_map[rec['pid']] = rec

示例#6

0

显示文件

文件： program.py 项目： peteehb/jss_awards

def run():
    config = utils.load_cfg('conf.cfg')
    template_path = config.get('image', 'filename')

    markers = {'school': config.getint('markers', 'school'),
               'year': config.getint('markers', 'year'),
               'level': config.getint('markers', 'level'),
               'award': config.getint('markers', 'award'),
               'recipient': config.getint('markers', 'recipient')}

    font = {'color': config.get('font', 'color'),
            'name': config.get('font', 'name'),
            'size': config.getint('font', 'size')}

    images_per_pdf = config.getint('pdf', 'images_per_pdf')
    if images_per_pdf > 6:
        # 6 is the maximum allowed number of images per pdf
        exit()

    csv_file = config.get('csv', 'filename')
    cc = CsvReader(csv_file)
    csv_data = cc.read()

    school = 'John Scottus School'
    year = '2016'

    image_folder = utils.create_folder('images')
    pdf_folder = utils.create_folder('pdfs')

    count = 0
    images = []

    draw_tool = ImageWriter(font)

    for row in csv_data:
        im = draw_tool.open_image(template_path)
        im = draw_tool.write_text(im, markers['school'], school)
        im = draw_tool.write_text(im, markers['year'], year)
        im = draw_tool.write_text(im, markers['level'], row['Level'])
        im = draw_tool.write_text(im, markers['award'], row['Award'])
        im = draw_tool.write_text(im, markers['recipient'], row['Recipient'])
        im_path = image_folder + '/' + 'image_' + utils.timestamp() + '.png'
        draw_tool.save_image(im, im_path)
        count += 1
        images.append(im_path)

        if count % images_per_pdf == 0:
            create_pdf(pdf_folder, images)
            count = 0
            images = []

    create_pdf(pdf_folder, images)

示例#7

0

显示文件

文件： filtering_cvs_reader.py 项目： ostwald/pubs_to_grants

    def __init__(self, path, filter_args={}, sort_args={}):
        self.sort_spec = SortSpec(**sort_args)
        self.filter_spec = FilterSpec(**filter_args)
        CsvReader.__init__(self, path)
        print '{} records before filtering'.format(len(self.data))
        # print 'filter spec: {}'.format(self.filter_spec.spec)
        if filter_args is not None:
            self.filter_data()

        if self.sort_spec is not None:
            self.data.sort(key=lambda x: x[self.sort_spec.spec['field']])
            if self.sort_spec.spec['reverse']:
                self.data.reverse()

示例#8

0

显示文件

文件： app.py 项目： OHovey/LugguageStrapper---2.0

 def import_csv_data(self, filename: str) -> None:
     array_of_strap_data = CsvReader(filename).get_data()
     for strap_data in array_of_strap_data:
         name, quantity, colour, company_code = strap_data[0], strap_data[
             1], strap_data[2], strap_data[3]
         strap = Strap(name, quantity, colour, company_code)
         self._add_strap_to_table(strap)

示例#9

0

显示文件

文件： org_unit.py 项目： vmataba/hfr-dhis-mapping

    def get_org_units(self):
            with open(OrgUnit.config['csv_file'], 'r') as csvFile:
                file_reader = csv.reader(csvFile)
                file_reader.next()

                data = CsvReader.read_csv_data()
            
                attributes = {}

                for row in file_reader:

                    for index in data:

                        attributes.update({data[index]: row[index]})

                    org_unit = OrgUnit(attributes)

                    OrgUnit.organization_units.append(org_unit.__dict__)

            contents = ''
            contents += '['
            count = 1
            for org_unit in OrgUnit.organization_units:
                if count < len(OrgUnit.organization_units):
                    contents += json.dumps(org_unit)+','
                else:
                    contents += json.dumps(org_unit)
                count += 1

            contents += ']'
            return contents

示例#10

0

显示文件

文件： org_unit_group.py 项目： vmataba/hfr-dhis-mapping

    def get_org_groups(self):
        with open(CsvReader.config['csv_file'], 'r') as csvFile:
            data = []
            data_vol = []
            file_reader = csv.reader(csvFile)
            file_reader.next()
            code_key = CsvReader.get_key_code('code')
            #name_key = CsvReader.get_key_code('name')
            identifier_key = CsvReader.get_key_code(Configuration.config['org_groups'].itervalues().next())
            for row in file_reader:
                    if row[identifier_key] not in data:
                        data.append(row[identifier_key])

                        orgGroup = OrgUnitGroup(row[code_key],row[identifier_key])
                    
                        data_vol.append(orgGroup.__dict__)
                                           
        return json.dumps(data_vol)

示例#11

0

显示文件

def write_doi_listing(csv_path, out_path):
    reader = CsvReader(csv_path)
    print '{} records'.format(len(reader.data))

    fp = open(out_path, 'w')
    for rec in reader.data:
        fp.write(rec['doi'] + '\n')
    fp.close()
    print 'wrote {} dois to {}'.format(len(reader.data),
                                       os.path.basename(out_path))

示例#12

0

显示文件

文件： import_data.py 项目： rsakurai1224/gevir

def import_mouse_het_lethal_knockout_genes(db):
    mouse_to_human_gene_names = {}
    mouse_to_human = CsvReader(MOUSE_TO_HUMAN_TSV, delimiter='\t')
    for document in mouse_to_human.data:
        mouse_to_human_gene_names[
            document['mouse_gene_name']] = document['human_gene_name']

    gene_names_het_lethal = set([])
    gene_transcripts_het_lethal = set([])

    mouse_abnormal_survival = CsvReader(MOUSE_HET_LETHAL_TSV, delimiter='\t')
    for document in mouse_abnormal_survival.data:
        mouse_gene_name = document['Ontology Annotation Subject . Symbol']
        mouse_zygosity = document['Subject Zygosity']
        mouse_phenotype = document['Ontology Annotation Term Name']
        # Ignore genes for which there are no human homologs
        if mouse_gene_name not in mouse_to_human_gene_names:
            continue

        human_gene_name = mouse_to_human_gene_names[mouse_gene_name]
        transcript_id = get_gene_canonical_transcript_by_name(
            db, human_gene_name)

        # Ignore genes which names were not found in ExAC
        if not transcript_id:
            continue

        exac_gene = db.exac.genes.find_one(
            {'canonical_transcript': transcript_id})
        gene_name = exac_gene['gene_name']
        gene_names_het_lethal.add(gene_name)
        gene_transcripts_het_lethal.add(transcript_id)

    db.gevir.mouse_het_lethal.drop()
    db.gevir.mouse_het_lethal.insert({
        '_id': 'gene_names',
        'data': list(gene_names_het_lethal)
    })
    db.gevir.mouse_het_lethal.insert({
        '_id': 'transcript_ids',
        'data': list(gene_transcripts_het_lethal)
    })

示例#13

0

显示文件

def write_pid2doi_json(csv_path, out_path):
    reader = CsvReader(csv_path)
    print '{} records'.format(len(reader.data))
    json_data = {}
    for rec in reader.data:
        json_data[rec['pid']] = rec['doi']
    fp = open(out_path, 'w')
    fp.write(json.dumps(json_data, indent=4))
    fp.close()
    print 'wrote {} mappings to {}'.format(len(reader.data),
                                           os.path.basename(out_path))

示例#14

0

显示文件

文件： org_unit_group.py 项目： vmataba/hfr-dhis-mapping

    def __init__(self,code,name,orgUnits = None):
        self.code = code
        self.name = name
        self.shortName = name

        org_units = []
        with open(CsvReader.config['csv_file'],'r') as csvFile:
            file_reader = csv.reader(csvFile)
            for row in file_reader:
                if self.name in row:
                    org_units.append({'code': row[CsvReader.get_key_code('code')]})
        self.organisationUnits = org_units

示例#15

0

显示文件

文件： org_unit_group_set.py 项目： vmataba/hfr-dhis-mapping

    def __init__(self, code, name, organisationUnitGroups=None):
        self.code = code
        self.name = name

        with open(Configuration.config['csv_file'], 'r') as csvFile:

            data = []
            data_vol = []
            file_reader = csv.reader(csvFile)
            file_reader.next()
            code_key = CsvReader.get_key_code('code')
            #name_key = CsvReader.get_key_code('name')
            group_identifier_key = CsvReader.get_key_code(
                Configuration.config['org_groups'].itervalues().next())
            group_set_identifier_key = CsvReader.get_key_code(
                Configuration.config['org_group_sets'].itervalues().next())
            for row in file_reader:
                if row[group_identifier_key] not in data:
                    if row[group_set_identifier_key] not in data:
                        data.append(row[group_identifier_key])

                        data_vol.append({'code': row[code_key]})

        self.organisationUnitGroups = data_vol

示例#16

0

显示文件

文件： import_data.py 项目： niab/hwe

def import_tandem_repeats(db):
    db.hw.tandem_repeats.drop()
    tandem_repeats = CsvReader(USCS_TANDEM_REPEATS_TSV, delimiter='\t')

    pos_repeats = set([])

    total_lines = len(tandem_repeats.data)
    line_number = 0
    bar = progressbar.ProgressBar(maxval=1.0).start()
    for document in tandem_repeats.data:
        # added for patch chroms like chr19_gl000209_random
        if len(document['chrom']) > 5:
            continue

        chrom = document['chrom'][3:]
        start = document['chromStart']
        end = document['chromEnd']

        xstart = get_xpos(chrom, start)
        xend = get_xpos(chrom, end)

        for xpos in range(xstart, xend + 1):
            pos_repeats.add(xpos)

        line_number += 1
        bar.update((line_number + 0.0) / total_lines)
    bar.finish()

    total_lines = len(pos_repeats)
    line_number = 0
    bar = progressbar.ProgressBar(maxval=1.0).start()

    bulk = db.hw.tandem_repeats.initialize_unordered_bulk_op()
    counter = 0
    for xpos in pos_repeats:
        bulk.insert({'_id': xpos})
        counter += 1

        if (counter % 500 == 0 and counter > 0):
            bulk.execute()
            bulk = db.hw.tandem_repeats.initialize_ordered_bulk_op()

        line_number += 1
        bar.update((line_number + 0.0) / total_lines)

    if (counter % 500 != 0):
        bulk.execute()
    bar.finish()

示例#17

0

显示文件

文件： import_data.py 项目： rsakurai1224/gevir

def read_clin_var_cites():
    clin_var_cites = {}
    clin_vars = CsvReader(CLIN_VAR_CITES_TSV, delimiter='\t')

    total_lines = len(clin_vars.data)
    line_number = 0
    bar = progressbar.ProgressBar(maxval=1.0).start()
    for document in clin_vars.data:
        allele_id = document['#AlleleID']
        citation_id = document['citation_id']

        if allele_id in clin_var_cites:
            clin_var_cites[allele_id].add(citation_id)
        else:
            clin_var_cites[allele_id] = set([citation_id])

        line_number += 1
        bar.update((line_number + 0.0) / total_lines)
    bar.finish()
    return clin_var_cites

示例#18

0

显示文件

def write_doi2date_json(csv_path, out_path):
    reader = CsvReader(csv_path)
    print '{} records'.format(len(reader.data))
    json_data = {}
    in_fmt = '%m/%d/%y'
    out_fmt = '%Y-%m-%d'
    for rec in reader.data:
        pubdate = rec['pub_date']
        try:
            date_val = time.strftime(out_fmt, time.strptime(pubdate, in_fmt))
        except:
            print "ERROR: could not parse {}: {}".format(
                pubdate,
                sys.exc_info()[1])
            date_val = ''
        json_data[rec['doi']] = date_val
    fp = open(out_path, 'w')
    fp.write(json.dumps(json_data, indent=4))
    fp.close()
    print 'wrote {} mappings to {}'.format(len(reader.data),
                                           os.path.basename(out_path))

示例#19

0

显示文件

文件： import_data.py 项目： rsakurai1224/gevir

def import_uneecon_g(db):
    uneecon_g_genes = []
    uneecon_g = CsvReader(UNEECON_G_TSV, delimiter='\t')
    for document in uneecon_g.data:
        gene_info = document['#gene']
        gene_id, gene_name = gene_info.split('|')
        gene_id = gene_id.split('.')[0]
        uneecon_score = float(document['UNEECON-G'])

        exac_gene = db.exac.genes.find_one({'gene_id': gene_id})
        transcript_id = exac_gene['canonical_transcript']
        uneecon_g_gene = Uneecon_G(transcript_id, gene_id, gene_name,
                                   uneecon_score)
        uneecon_g_genes.append(uneecon_g_gene.get_dictionary())

    db.gevir.uneecon_genes.drop()
    db.gevir.uneecon_genes.insert_many(uneecon_g_genes)

    db.gevir.uneecon_genes.create_index([('gene_id', pymongo.ASCENDING)],
                                        name='gene_id_1')
    db.gevir.uneecon_genes.create_index([('gene_name', pymongo.ASCENDING)],
                                        name='gene_name_1')
    db.gevir.uneecon_genes.create_index([('uneecon_g', pymongo.ASCENDING)],
                                        name='uneecon_g_1')

示例#20

0

显示文件

class Window(QMainWindow):
    """
    Main GUI class for application
    """

    csv_path = None

    #
    # dict for storing attibutes data
    # as associative dict of col/attr name and AttributeInfo
    #
    attrs = {}

    def __init__(self):
        QWidget.__init__(self)

        # loaind ui from xml
        uic.loadUi(os.path.join(DIRPATH, 'app.ui'), self)

        # button event handlers
        self.btnXmlTemplate.clicked.connect(self.handleXmlTemplate)
        self.btnBrowseData.clicked.connect(self.handleBrowseData)
        self.btnSave.clicked.connect(self.handleSave)
        self.btnSaveClose.clicked.connect(self.handleSaveClose)
        self.btnCancel.clicked.connect(self.handleCancel)

        # attribute definition and definition source change event handlers
        self.teAttributeDef1.textChanged.connect(
            self.handleAttributeDefChanged1)
        self.teAttributeSource1.textChanged.connect(
            self.handleAttributeSourceChanged1)

        self.teAttributeDef2.textChanged.connect(
            self.handleAttributeDefChanged2)
        self.teAttributeSource2.textChanged.connect(
            self.handleAttributeSourceChanged2)

        self.teAttributeDef3.textChanged.connect(
            self.handleAttributeDefChanged3)
        self.teAttributeSource3.textChanged.connect(
            self.handleAttributeSourceChanged3)

        # ENUMERATED data change event handlers
        self.teEnumDef.textChanged.connect(self.enumDefinitionChanged)
        self.teEnumSrc.textChanged.connect(self.enumSourceChanged)
        self.lstEnumUnique.currentItemChanged.connect(self.enumUniqueChanged)

        # RANGE min, max, unit, resolution data change event handlers
        self.teRangeMin.textChanged.connect(self.handleRangeMinChanged)
        self.teRangeMax.textChanged.connect(self.handleRangeMaxChanged)
        self.teUnit.textChanged.connect(self.handleUnitChanged)
        self.teResolution.textChanged.connect(self.handleResolutionChanged)

        # CODESET name and source change event handler
        self.teCodesetName.textChanged.connect(self.handleCodesetNameChanged)
        self.teCodesetSource.textChanged.connect(
            self.handleCodesetSourceChanged)

        # UNREPRESENTABLE desc filed change event handler
        self.teUnreprDesc.textChanged.connect(
            self.handleUnreprensentableChanged)

        # radio buttons signal handlers
        self.rbEnumerated.toggled.connect(self.fieldTypeChanged)
        self.rbRange.toggled.connect(self.fieldTypeChanged)
        self.rbCodeset.toggled.connect(self.fieldTypeChanged)
        self.rbUnrepresentable.toggled.connect(self.fieldTypeChanged)

        # convenience dict of radio button names and objects
        self.dict_rbs = {
            ENUMERATED: self.rbEnumerated,
            RANGE: self.rbRange,
            CODESET: self.rbCodeset,
            UNREPRESENTABLE: self.rbUnrepresentable
        }

        # adding listener for left side list's item changed
        self.lstItems.currentItemChanged.connect(self.attributeChanged)

        # gui setup
        self.setupGui()

    def setupGui(self):
        """
        Initialize gui and defualt values etc
        """

        # default initialization of GUI
        self.lblMeasurementDesc.setText(TXT_MEASUREMENT_DESC)

        # setting desc for domain detail labels for dunamic widgets
        self.lblEnumeratedDomain.setText(TXT_ENUMERATED_DOMAIN)
        self.lblRangeDomain.setText(TXT_RANGE_DOMAIN)
        self.lblCodesetDomain.setText(TXT_CODESET_DOMAIN)
        self.lblUnrepresentableDomain.setText(TXT_UNPRESENTABLE_DOMAIN)

        # default field type select in ui is 'enumerated'
        self.field_type = ENUMERATED

        # default field type is 'enumerated'
        self.rbEnumerated.setChecked(True)

        # overview text
        self.teOverview.setText(TXT_OVERVIEW)

        # citation text
        self.teCitation.setText(TXT_CITATION)

    def ensureAttributeType(self):
        if not self.lstItems.count():
            self.attr_type = None
            return

        if not self.lstItems.currentItem():
            self.lstItems.setCurrentRow(0)
        self.attr_type = self.lstItems.currentItem().text()

    def attributeChanged(self):
        """
        Function dynamically shows widgets on right side of tab 2
        Whenever an item is selected/changed on lest side list.

        Radio buttons on right side are automatically selected,
        depending upon the values of attribute's column in csv file
        """

        self.ensureAttributeType()
        if not self.attr_type:
            print("[WARNING] attributeChanged() :: not attr type !!!")
            return

        self.refresh_enumerated_list_values()

        ################################################
        #                                              #
        # Enable/disable radio buttons on right side   #
        # depending upon the data of select attribute  #
        # associated with a column in csv file         #
        #                                              #
        ################################################

        if not self.csv_reader:
            print("[WARNING] :: No csv loaded to read values")
            return

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] attributeChanged() :: attributes dict is not initialized"
            )

            # first time - default vals
            self.teAttributeDef1.setText(
                "%s %s" % (ATTRIBUTE_DEFINITION, self.attr_type))
            self.teAttributeSource1.setText("%s %s" %
                                            (DATA_SOURCE, self.attr_type))

            self.teAttributeDef2.setText(
                "%s %s" % (ATTRIBUTE_DEFINITION, self.attr_type))
            self.teAttributeSource2.setText("%s %s" %
                                            (DATA_SOURCE, self.attr_type))

            self.teAttributeDef3.setText(
                "%s %s" % (ATTRIBUTE_DEFINITION, self.attr_type))
            self.teAttributeSource3.setText("%s %s" %
                                            (DATA_SOURCE, self.attr_type))
            return

        ####################################################
        #                                                  #
        # Fill UI with information from selected attribute #
        #                                                  #
        ####################################################

        attr = self.attrs[self.attr_type]

        # update text fields for attribute definition and definition source
        # from values in the current attribute
        self.teAttributeDef1.setText(attr.defn1)
        self.teAttributeSource1.setText(attr.dsrc1)

        self.teAttributeDef2.setText(attr.defn2)
        self.teAttributeSource2.setText(attr.dsrc2)

        self.teAttributeDef3.setText(attr.defn3)
        self.teAttributeSource3.setText(attr.dsrc3)

        ucnt = self.csv_reader.number_of_Uvalues_per_col(self.attr_type)
        dtype = str(attr.dtype)

        # checking/selecting radio button depending upon the type
        # determined for given cols datatype and unique counts
        ft = self.det_rb_type(self.attr_type)
        ft = attr.field_type
        if ft:
            self.dict_rbs[ft].setChecked(True)
        else:
            print("WARNING :: No radio buttion selected for '%s'" %
                  self.attr_type)

            #FIXME - unchecking all radio buttons or suggest me what to do !!!!
            self.uncheck_all_radio_buttons()

        self.showCurrentAttributeDataInWidgets()

    def uncheck_all_radio_buttons(self):
        for k, v in self.dict_rbs.iteritems():
            self.dict_rbs[k].setChecked(False)

    def show_range_widget(self):
        self.widRange.setHidden(False)
        self.horizontalLayout.addWidget(self.widRange)

    def show_codeset_widget(self):
        self.widCodeset.setHidden(False)
        self.horizontalLayout.addWidget(self.widCodeset)

    def show_unrepresentable_widget(self):
        self.widUnrepresentable.setHidden(False)
        self.horizontalLayout.addWidget(self.widUnrepresentable)

    def show_enumerated_widget(self):
        self.widEnumerated.setHidden(False)
        self.horizontalLayout.addWidget(self.widEnumerated)

    def handleXmlTemplate(self):
        """
        Browse and select the input xml file
        """
        print('Xml Template')

        filenames = getFileFromDialog()
        self.teXmlTemplate.setText(filenames[0])

    def handleBrowseData(self):
        """
        Browse and read the input csv file
        """

        # set old one to null
        self.csv_reader = None
        self.attr_type = None
        self.attrs = {}
        self.lstItems.clear()
        self.lstEnumUnique.clear()
        self.field_type = None

        filenames = getFileFromDialog()
        self.teBrowseData.setText(filenames[0])

        # creating csv reader
        self.csv_path = filenames[0]
        self.csv_reader = CsvReader(self.csv_path)

        # adding list items on left side
        for col in self.csv_reader.get_col_pd_names():
            self.lstItems.addItem(col)

        # select first row if there are elements in list
        if self.lstItems.count() > 0:
            self.lstItems.setCurrentRow(0)
            self.attr_type = self.lstItems.currentItem().text()

        self.refresh_enumerated_list_values()

        self.populate_attributes_data()

        self.showCurrentAttributeDataInWidgets()

    def det_rb_type(self, col):
        """
        Function determines the radio button type depending upon 
        the datatype and count of unique values in the given col in csv

        return: ENUMERATED, RANGE, CODESET, UNREPRESENTABLE

        Note:
        Function raise an exception if rb type can't be determined
        """

        ucnt = self.csv_reader.number_of_Uvalues_per_col(col)
        dtype = str(self.csv_reader.det_datatypes_col(col))

        # checking float64 columns for radio buttons
        if dtype == "float64":
            """
            Unrepresentable if the unique values are less than 23 and
            the datatype is float64. 
            """
            if ucnt < 23:
                return UNREPRESENTABLE
            """
            Range radio button to be selected if there are 23-40 unique vales
            and the datatype is float64.  
            """
            if ucnt >= 23 and ucnt <= 40:
                return RANGE
            """
            Codeset radio button to be selected if there are more than 40 unique
            values and the datatype is float64.  
            """
            if ucnt > 40:
                return CODESET

            #FIXME
            # what to return in else case ?????

        # checking object columns for radio buttons
        if dtype == "object":
            """
            Enumerated if the datatype is 'object' and there are less than 20
            unique values.
            """
            if ucnt < 20:
                return ENUMERATED

        ################################################
        #                                              #
        # NOTE                                         #
        # Default is UNREPRESENTABLE type when we cant #
        # Determine radio button type as above         #
        #                                              #
        ################################################
        return UNREPRESENTABLE

    def populate_attributes_data(self):
        """
        Function populates attributes info in data structure

        i.e. 
        a python dict for AttributeInfo objects and their associated data
        """

        # clearing previous data from dict
        self.attrs = {}
        self.attr_type = None

        firstTime = True

        # create attribute info for each col in csv
        for col in self.csv_reader.get_col_pd_names():
            print("[INFO] populating data for: %s" % col)

            # creating attribute info
            ai = AttributeInfo(col)
            ai.field_type = self.det_rb_type(col)

            # ensure field type
            if not ai.field_type:
                print(
                    "[WARNING] :: cant populate attribute info for '%s', as field type is null"
                    % col)
                continue

            #FIXME or add a fallback field type UNREPRESENTABLE !

            ai.set_unique_count(self.csv_reader.number_of_Uvalues_per_col(col))
            ai.set_data_type(self.csv_reader.det_datatypes_col(col))
            ai.set_defn1("%s %s" % (ATTRIBUTE_DEFINITION, col))
            ai.set_dsrc1("%s %s" % (DATA_SOURCE, col))

            ai.rmin = self.csv_reader.get_col_min(col)
            ai.rmax = self.csv_reader.get_col_max(col)

            ai.en_unique_index = 0
            ai.en_unique_val = ai.rmin

            # check radio button and set val for range min/max textboxes for first attribute
            if firstTime:
                firstTime = False
                self.dict_rbs[ai.field_type].setChecked(True)
                self.teRangeMax.setText(str(ai.rmax))
                self.teRangeMin.setText(str(ai.rmin))

            # lets see the attribute info
            ai.show()

            # keeping attribute info in dict
            self.attrs[col] = ai

    def refresh_enumerated_list_values(self):
        self.lstEnumUnique.clear()

        if not self.csv_reader:
            print("[WARNING] :: No csv loaded to read values")
            return

        if not self.attr_type:
            print(
                "[WARNING] :: refresh_enumerated_list_values() cant find current attribute"
            )
            return

        # adding unique values in enumerated widget's list widget
        for val in self.csv_reader.get_unique_values_col(self.attr_type):
            self.lstEnumUnique.addItem(str(val))

        if self.attrs:
            attr = self.attrs[self.attr_type]
            if attr.en_unique_val:
                self.lstEnumUnique.setCurrentRow(attr.en_unique_index)
                return

        # select first row if there are elements in list
        elif self.lstEnumUnique.count() > 0:
            self.lstEnumUnique.setCurrentRow(0)

    def handleSave(self):

        if not self.teXmlTemplate.toPlainText():
            print("[ERROR] :: XML output not specified")
            self.show_msgbox("Error", "Output Xml file not specified !")
            return

        ##########################################################
        #                                                        #
        #  Create xml writer and pass ui to it , XML Writer will #
        #  fetch data from ui and save/update in the output xml  #
        #                                                        #
        ##########################################################
        self.xml_writer = XmlWriter(self.teXmlTemplate.toPlainText())
        self.xml_writer.save(self.attrs, self.teCitation.toPlainText(),
                             self.teCitation.toPlainText())

    def handleSaveClose(self):
        self.handleSave()
        QApplication.quit()

    def handleCancel(self):
        QApplication.quit()

    def fieldTypeChanged(self):
        self.widRange.setHidden(True)
        self.widCodeset.setHidden(True)
        self.widUnrepresentable.setHidden(True)
        self.widEnumerated.setHidden(True)

        # remove all widgets from horizontal layout
        for i in reversed(range(self.horizontalLayout.count())):
            self.horizontalLayout.removeWidget(
                self.horizontalLayout.itemAt(i).widget())

        if self.rbEnumerated.isChecked():
            self.field_type = ENUMERATED
            self.show_enumerated_widget()

        if self.rbRange.isChecked():
            self.field_type = RANGE
            self.show_range_widget()

        if self.rbCodeset.isChecked():
            self.field_type = CODESET
            self.show_codeset_widget()

        if self.rbUnrepresentable.isChecked():
            self.field_type = UNREPRESENTABLE
            self.show_unrepresentable_widget()

        ################################################
        #                                              #
        # Update field type of selected attribute when #
        # a radio button is changed                    #
        #                                              #
        ################################################

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] fieldTypeChanged() :: attributes dict is not initialized"
            )
            return

        # beacuse of async signal/slot for various ui elements
        # would be good to perform this check
        if not self.attr_type and self.lstItems.count() > 0:
            self.attr_type = self.lstItems.currentItem().text()

        self.printAttrs()

        self.attrs[self.attr_type].field_type = self.field_type
        self.showCurrentAttributeDataInWidgets()

    def printAttrs(self):
        """
        convenience funciton to watch attributes
        """
        print("-------------[ Attributes ]-------------------")
        for k, v in self.attrs.iteritems():
            print(k, v)
        print("----------------------------------------------")

    def showCurrentAttributeDataInWidgets(self):
        """
        Function display the data of current selected
        attribute in the dynamic widgets on the right side
        """

        if not self.attr_type:
            return

        print("[showCurrentAttributeDataInWidgets]")

        attr = self.attrs[self.attr_type]

        # showing data in enumerated widget
        if attr.en_def:
            self.teEnumDef.setText(attr.en_def)
        else:
            self.teEnumDef.setText("")

        if attr.en_src:
            self.teEnumSrc.setText(attr.en_src)
        else:
            self.teEnumSrc.setText("")

        if attr.en_unique_val:
            self.lstEnumUnique.setCurrentRow(attr.en_unique_index)

        # showing data in range widget
        if attr.rmin:
            self.teRangeMin.setText(str(attr.rmin))
        else:
            self.teRangeMin.setText("")

        if attr.rmax:
            self.teRangeMax.setText(str(attr.rmax))
        else:
            self.teRangeMax.setText("")

        if attr.unit:
            self.teUnit.setText(str(attr.unit))
        else:
            self.teUnit.setText("")

        if attr.resolution:
            self.teResolution.setText(str(attr.resolution))
        else:
            self.teResolution.setText("")

        # showing data in codeset widget
        if attr.cs_name:
            self.teCodesetName.setText(str(attr.cs_name))
        else:
            self.teCodesetName.setText("")

        if attr.cs_src:
            self.teCodesetSource.setText(str(attr.cs_src))
        else:
            self.teCodesetSource.setText("")

        # showing data unrepresentable widget
        if attr.ur_desc:
            self.teUnreprDesc.setText(str(attr.ur_desc))
        else:
            self.teUnreprDesc.setText("")

    def handleAttributeDefChanged1(self):
        """
        Function is called when ever data is changed in "attribute definition 1" text box
        """

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleAttributeDefChanged1() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update definition value for current attribute
        self.attrs[self.attr_type].defn1 = self.teAttributeDef1.toPlainText()

    def handleAttributeDefChanged2(self):
        """
        Function is called when ever data is changed in "attribute definition 2" text box
        """

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleAttributeDefChanged2() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update definition value for current attribute
        self.attrs[self.attr_type].defn2 = self.teAttributeDef2.toPlainText()

    def handleAttributeDefChanged3(self):
        """
        Function is called when ever data is changed in "attribute definition 3" text box
        """

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleAttributeDefChanged3() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update definition value for current attribute
        self.attrs[self.attr_type].defn3 = self.teAttributeDef3.toPlainText()

    def enumDefinitionChanged(self):
        """
        Function is called when ever data is changed in enumerated definition
        """

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] enumDefinitionChanged() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update definition source value for current attribute
        self.attrs[self.attr_type].en_def = self.teEnumDef.toPlainText()

    def enumSourceChanged(self):
        """
        Function is called when ever data is changed in enumerated value
        """

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] enumSourceChanged() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update definition source value for current attribute
        self.attrs[self.attr_type].en_src = self.teEnumSrc.toPlainText()

    def enumUniqueChanged(self):
        """
        Function is called when ever data is changed in enumerated unique list
        """

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] enumUniqueChanged() :: attributes dict is not initialized"
            )
            return

        if not self.lstEnumUnique.count():
            print("[WARNING] enumUniqueChanged() :: lst is empty")
            return

        if not self.lstEnumUnique.currentItem():
            print("[WARNING] enumUniqueChanged() :: lst has not item selected")
            return

        self.ensureAttributeType()

        # update enum unique value for current attribute
        self.attrs[
            self.attr_type].en_unique_index = self.lstEnumUnique.currentRow()
        self.attrs[
            self.attr_type].en_unique_val = self.lstEnumUnique.currentItem(
            ).text()

    def handleAttributeSourceChanged1(self):
        """
        Function is called when ever data is changed in "attribute definition source 1" text box
        """

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleAttributeSourceChanged1() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update definition source value for current attribute
        self.attrs[
            self.attr_type].dsrc1 = self.teAttributeSource1.toPlainText()

    def handleAttributeSourceChanged2(self):
        """
        Function is called when ever data is changed in "attribute definition source 2" text box
        """

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleAttributeSourceChanged2() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update definition source value for current attribute
        self.attrs[
            self.attr_type].dsrc2 = self.teAttributeSource2.toPlainText()

    def handleAttributeSourceChanged3(self):
        """
        Function is called when ever data is changed in "attribute definition source 3" text box
        """

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleAttributeSourceChanged3() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update definition source value for current attribute
        self.attrs[
            self.attr_type].dsrc3 = self.teAttributeSource3.toPlainText()

    def handleRangeMinChanged(self):
        """
        Function is called when ever range min is changed
        """

        print("[handleRangeMinChanged] range min: ",
              self.teRangeMin.toPlainText())

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleRangeMinChanged() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update range min value for current attribute
        self.attrs[self.attr_type].rmin = self.teRangeMin.toPlainText()

    def handleRangeMaxChanged(self):
        """
        Function is called when ever range max is changed
        """
        print("[handleRangeMaxChanged] range max: ",
              self.teRangeMax.toPlainText())

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleRangeMaxChanged() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update range max value for current attribute
        self.attrs[self.attr_type].rmax = self.teRangeMax.toPlainText()

    def handleUnitChanged(self):
        """
        Function is called when ever range unit is changed
        """
        print("[handleUnitChanged] range unit: ", self.teUnit.toPlainText())

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleUnitChanged() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update range unit value for current attribute
        self.attrs[self.attr_type].unit = self.teUnit.toPlainText()

    def handleResolutionChanged(self):
        """
        Function is called when ever range resolution is changed
        """
        print("[handleResolutionChanged] resolution: ",
              self.teResolution.toPlainText())

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleResolutionChanged() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update range resolution value for current attribute
        self.attrs[self.attr_type].resolution = self.teResolution.toPlainText()

    def handleCodesetNameChanged(self):
        """
        Function is called when ever codeset name is changed
        """
        print("[handleCodesetNameChanged] codeset name: ",
              self.teCodesetName.toPlainText())

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleCodesetNameChanged() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update codeset name
        self.attrs[self.attr_type].cs_name = self.teCodesetName.toPlainText()

    def handleCodesetSourceChanged(self):
        """
        Function is called when ever codeset source is changed
        """
        print("[handleCodesetSourceChanged] codeset source: ",
              self.teCodesetName.toPlainText())

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleCodesetSourceChanged() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update codeset source
        self.attrs[self.attr_type].cs_src = self.teCodesetSource.toPlainText()

    def handleUnreprensentableChanged(self):
        """
        Function is called when ever UNREPRESENTABLE desc is changed
        """
        print("[handleUnreprensentableChanged] unrepresentable: ",
              self.teUnreprDesc.toPlainText())

        # if attributes dict is not initialized not need to continue
        if not self.attrs:
            print(
                "[WARNING] handleUnreprensentableChanged() :: attributes dict is not initialized"
            )
            return

        self.ensureAttributeType()

        # update unrepresentable value for current attribute
        self.attrs[self.attr_type].ur_desc = self.teUnreprDesc.toPlainText()

    def show_msgbox(self, title, text):
        """
        Function for showing error/info message box
        """
        msg = QMessageBox()
        msg.setIcon(QMessageBox.Information)
        msg.setText(text)
        msg.setWindowTitle(title)
        msg.setStandardButtons(QMessageBox.Ok)

        retval = msg.exec_()
        print("[INFO] Value of pressed message box button:", retval)

示例#21

0

显示文件

def main():

    '''
    This is the entrypoint of the whole application. From the CMD,
    the user can specify either 'train' or 'test' after 'python app.py'
    to choose the mode which they would like to execute.
    '''
    # handle arguments from CMD
    argument_parser = argparse.ArgumentParser(description='Choose whether you want to train network or test network')
    argument_parser.add_argument('config', help='choose whether to train or test network')
    args = argument_parser.parse_args().config

    print('the inputted args is', args)

    if args is None:
        print('This configuration is unsupported. Please enter [train] or [test]')
        return

    # file paths for training data
    faces_file_path = 'face-images-with-marked-landmark-points/face_images.npz'
    csv_file_path = 'face-images-with-marked-landmark-points/facial_keypoints.csv'

    # helper classes
    image_reader = ImageReader(faces_file_path)
    csv_reader = CsvReader(csv_file_path)

    image_data = image_reader.get_array()
    image_data = image_data['face_images']
    image_data = np.moveaxis(image_data, -1, 0)

    # Must map all the images into 3-channels
    image_data = map_images_to_3_channels(image_data)

    # Sample image
    # -----------------------------------------
    # image = image_data[1]
    # image = convert_to_3_channels(image)
    # print(image.shape)
    # plt.imshow((image * 255).astype(np.uint8))
    # plt.show()
    # -----------------------------------------

    # this is a dataframe
    df = csv_reader.get_facial_features()
 
    # This gets all the coordinates from the dataframe
    coordinates_list = extract_coordinates_list_unzipped(df)
    coordinates_list = np.array(coordinates_list)
    coordinates_list = scale_coordinates(coordinates_list)
    # draw coordinates on image
    
    # validation to ensure data is clean
    validate_data(image_data, coordinates_list)

    # extracting and separating data as appropriate
    X_train, y_train, X_test, y_test = split(image_data, coordinates_list)

    if args == 'train':
        print('[train] configuration selected. Training.')

        # image_reader = ImageReader('training_data.npz')
        # data = image_reader.get_array()
        # X_train = data['images']
        # y_train = data['coordinates_list']

        loader = Loader()

        X_train = loader.load_from_filepath('images.npy')
        y_train = loader.load_from_filepath('coordinates_list.npy')
        
        print('images size', len(X_train))
        print('coordinates size', len(y_train))

        assert len(X_train) == len(y_train)

        train(get_custom_model(), X_train, y_train, X_test, y_test)

    elif args == 'test':
        print('[test] configuration selected. Testing.')
        model = get_trained_model('modelMKIII.h5')
        prediction = predict_images(model, X_test)
        assert len(X_test) == len(prediction)
        draw_images(X_test, prediction)

    else:
        print('This configuration is unsupported. Please enter [train] or [test]')

示例#22

0

显示文件

文件： all_features.py 项目： ychen93/cl2_project

    use_gender = True

    unigrams = read_unigrams_for_feature(unigram_capacity)
    print ">>", len(unigrams), 'unigram features:', unigrams
    bigrams = read_bigrams_for_feature(bigram_capacity)
    print ">>", len(bigrams), 'bigram features:', bigrams

    bag_of_words = read_unigrams_for_feature(0)  # 84
    print ">> a bag of words of size ", len(bag_of_words), ":", bag_of_words
    bag_of_bigrams = read_bigrams_for_feature(0)  # 5

    print ">> a bag of bigrams of size ", len(
        bag_of_bigrams), ":", bag_of_bigrams

    print "Training data loading..."
    training_data = CsvReader(constants.TRAINING_DATA_FILE)
    print "Training data loaded"

    print "Creating the topic model..."
    topic_model_feature = TopicModelFeature()
    #topic_model_feature = None
    print "Creation of the topic model done"

    all_features, neu_labels = process_training_data(0, training_data,
                                                     unigrams, bigrams,
                                                     bag_of_words,
                                                     bag_of_bigrams,
                                                     topic_model_feature)

    print "Testing data loading..."
    test_data = CsvReader(constants.TEST_DATA_FILE, True)

示例#23

0

显示文件

import sys
import constants

from csv_reader import CsvReader

if __name__ == "__main__":
    weka_output = sys.argv[1]
    neu_indices = list()

    for line in open(weka_output, 'r'):
        row = line.split()
        row = [r.strip() for r in row]
        if float(row[2]) >= 28:
            neu_indices.append(int(row[0]) - 1)  # Weka indices start from 1

    print neu_indices
    test_data = CsvReader(constants.TEST_DATA_FILE, True)
    author_ids_test = test_data.get_author_ids()

    print len(neu_indices)
    print "class,predictions"
    print "+, ",
    for i in range(0, len(neu_indices)):
        print author_ids_test[neu_indices[i]],

示例#24

0

显示文件

文件： test_csv_reader.py 项目： peteehb/jss_awards

 def test_read_data_from_csv_reader(self):
     csv_reader = CsvReader('test/test.csv')
     data = csv_reader.read()
     self.assertEqual(data, [{'key_1': 'value_1', 'key_2': 'value_2'}])

示例#25

0

显示文件

文件： word_feature.py 项目： ychen93/cl2_project

    def get_feature_by_punctuation(self, punc):
        aut_statuses = self._data_set.init_status_group_by_author()
        punc_counts = list()
        for statuses in aut_statuses:
            count = 0
            for status in statuses:
                for c in status:
                    if c == punc:
                        count += 1
            punc_counts.append(count)
        return punc_counts


if __name__ == "__main__":
    training_data = CsvReader(constants.TRAINING_DATA_FILE)
    fr = WordFeature(training_data)
    author_ids, neus = training_data.get_author_ids_and_neu_labels()
    # author_ids = fr.get_author_list()
    # print len(author_ids), ' author ids ', author_ids[0], ' ... ', author_ids[-1], ' loaded: ', author_ids.index('2ab5dc09af49cf5114e60087919f1f3a')
    # neus = fr.get_nue_list()
    # print len(neus), ' neus ', neus[0], ' ... ', neus[-1]
    # author_statuses = fr.read_status_group_by_author()
    # print len(author_statuses), ' author statuses '     # , author_statuses[0], ' ... ', author_statuses[author_ids.index('2ab5dc09af49cf5114e60087919f1f3a')]

    # avg_lens = fr.get_author_avg_lens()
    # print len(avg_lens), ' avg_lens:', avg_lens[0], ' ', avg_lens[1], '...', avg_lens[-1]
    # print 'average length feature: ', pearsonr(avg_lens, neus)

    # avg_afinns = fr.get_feature_by_afinn('../dataset/AFINN-111.txt')
    # print len(avg_afinns), ' avg_afinns:', avg_afinns[0], ' ', avg_afinns[1], '...', avg_afinns[-1]

示例#26

0

显示文件

文件： import_data.py 项目： niab/hwe

def import_gdit_genes(db):
    gdit_genes = CsvReader(GENE_DISCOVERY_INFORMATION_TOOLKIT_LEK)
    gdit_genes.import_to_db(db.hw, 'gdit_genes')
    db.hw.gdit_genes.create_index([('gene', pymongo.ASCENDING)], name='gene_1')

示例#27

0

显示文件

import sys
sys.path.insert(0, '../library')

from csv_reader import CsvReader

task = "hw1"
# replace with absolute address of assignments folder
base_dir = "../hw1"
grade_suffix = "q1"
scale = 10
new_task = CsvReader(task, base_dir)
new_task.read(grade_suffix, scale)

示例#28

0

显示文件

# -*- coding: cp936 -*-
"""
生成各年的联合专利邻接矩阵
"""
import os
import sys

from csv_reader import CsvReader
from graph_maker import GraphMaker

default_encoding = "utf-8"
if default_encoding != sys.getdefaultencoding():
    reload(sys)
    sys.setdefaultencoding(default_encoding)

YEAR = ['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016']
INPUT_CSV = u'2007-2016医药卫生行业数据/2007-2016全国医药卫生联合专利.csv'
PREFIX_LABEL = 'AM_医药卫生'

if __name__ == '__main__':
    assert (os.path.exists(INPUT_CSV)), 'Such file \"' + INPUT_CSV + '\" does not exists!'
    for y in YEAR:
        y = str(y)
        my_reader = CsvReader(INPUT_CSV, start_time=y + '-01-01', end_time=y + '-12-31')
        print u'生成' + y + u'年邻接矩阵'
        my_graph_maker = GraphMaker(my_reader.joint_applicant_list())
        my_graph_maker.write_adjacent_matrix(PREFIX_LABEL + '_' + y + '.csv')

示例#29

0

显示文件

文件： weka_predict_regress.py 项目： artir/cl2_project

import sys
import constants

from csv_reader import CsvReader

if __name__ == "__main__":
    weka_output = sys.argv[1]
    neu_indices = list()

    for line in open(weka_output, 'r'):
        row = line.split()
        row = [r.strip() for r in row]
        if float(row[2]) >= 28:
            neu_indices.append(int(row[0]) - 1)       # Weka indices start from 1

    print neu_indices
    test_data = CsvReader(constants.TEST_DATA_FILE, True)
    author_ids_test = test_data.get_author_ids()

    print len(neu_indices)
    print "class,predictions"
    print "+, ",
    for i in range(0, len(neu_indices)):
        print author_ids_test[neu_indices[i]],

示例#30

0

显示文件

文件： main.py 项目： cooklee/projectSprint

from csv_reader import CsvReader
from DataScientistMethod import train_split_data
from model import IrisModel

#import os
#dir_path = os.path.dirname(os.path.realpath(__file__))

#print("!!!!!!"+dir_path)

# load data
data = CsvReader.create_from_csv(
    "/Users/rklis/Projects/PSD_training/projectSprint/iris.csv", sep=";")

# data preparation
test_data, to_learn = train_split_data(data, 'iris_type', 0.3)

irisModel = IrisModel(test_data, to_learn, data.get_types_of_data('iris_type'))

irisModel.learn()
for item in test_data:
    a = irisModel.predict(item)
    b = 1
#

示例#31

0

显示文件

文件： main.py 项目： sunlight3d/Python3ForDataScience

from multipline import multiline
from bar import bar
from piechart import PieChart
from stackplot import StackPlot
from lineplot import LinePlot
from histogram import Histogram
from scatter import Scatter
# multiline()
# bar()
from csv_reader import CsvReader
from random import randint
csv_reader = CsvReader('movies.csv')
csv_reader.read_csv()
# csv_reader.draw_films_by_years()
# csv_reader.draw_films_by_profits()
pie_chart = PieChart()
pie_chart.draw_pie_chart()
# stack_plot = StackPlot()
# stack_plot.draw_stack_plot()
# line_plot = LinePlot()
# line_plot.draw_line_plot()

# histogram_plot = Histogram()
# histogram_plot.draw_histogram()

scatter_plot = Scatter()
# scatter_plot.draw_scatter()
# scatter_plot.create_fake_csv()
# scatter_plot.draw_scatter_from_csv()

示例#32

0

显示文件

文件： alipay2moneypro.py 项目： jason-xu-suz/financial_tools

from csv_entity.ali_csv import AliCsvRowData
from csv_reader import CsvReader

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

csv_path = 'alipay/test.csv'

# if os.path.isfile(csv_path):
#     with open(csv_path, "r") as csvfile:
#         reader = csv.reader(csvfile)
#         # 这里不需要readlines
#         for line in reader:
#             print line
logger.debug("---")
reader = CsvReader(csv_path)
data_rows = reader.reader_csv()

data_list = []
for row in data_rows:
    if unicode.strip(row[0].decode('utf8')) == u'交易号':
        continue
    else:
        data = AliCsvRowData(row[0], row[1], row[2], row[3], row[4], row[5],
                             row[6], row[7], row[8], row[9], row[10], row[11],
                             row[12], row[13], row[14], row[15])
        logger.debug(data)
        data_list.append(data)

logger.debug(len(data_list))

示例#33

0

显示文件

文件： data.py 项目： nathantau/Smile

from csv_reader import CsvReader
from img_reader import ImageReader
'''
This is the entrypoint for data generation. It extracts
data from given files of facial images and corresponding
feature coordinates, generating and saving data
as *npy files after peforming data augmentation
'''

# file paths for training data
faces_file_path = 'face-images-with-marked-landmark-points/face_images.npz'
csv_file_path = 'face-images-with-marked-landmark-points/facial_keypoints.csv'

# helper classes
image_reader = ImageReader(faces_file_path)
csv_reader = CsvReader(csv_file_path)

image_data = image_reader.get_array()
image_data = image_data['face_images']
image_data = np.moveaxis(image_data, -1, 0)

# Must map all the images into 3-channels
image_data = map_images_to_3_channels(image_data)

# this is a dataframe
df = csv_reader.get_facial_features()

# this gets all the coordinates from the dataframe
coordinates_list = extract_coordinates_list_unzipped(df)
coordinates_list = np.array(coordinates_list)
coordinates_list = scale_coordinates_list(coordinates_list)