def handleBrowseData(self): """ Browse and read the input csv file """ # set old one to null self.csv_reader = None self.attr_type = None self.attrs = {} self.lstItems.clear() self.lstEnumUnique.clear() self.field_type = None filenames = getFileFromDialog() self.teBrowseData.setText(filenames[0]) # creating csv reader self.csv_path = filenames[0] self.csv_reader = CsvReader(self.csv_path) # adding list items on left side for col in self.csv_reader.get_col_pd_names(): self.lstItems.addItem(col) # select first row if there are elements in list if self.lstItems.count() > 0: self.lstItems.setCurrentRow(0) self.attr_type = self.lstItems.currentItem().text() self.refresh_enumerated_list_values() self.populate_attributes_data() self.showCurrentAttributeDataInWidgets()
def main(): logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') db = Store() reader = CsvReader() data = reader.get_data_from_csv('Monitoring report.csv') if data is None: exit conn = db.create_connection(db_file='monitoring-report.db') if conn is None: exit # Create the monitoring table sql_create_table = "CREATE TABLE IF NOT EXISTS monitoring(date, energy, reactive_energy, power, maximeter, reactive_power, voltage, intensity, power_factor)" if db.create_table(conn, sql_create_table) is None: exit values = [] for index, row in data.iterrows(): values.append(row) db.insert_data(conn, values) conn.close()
def import_seg_dups(db): db.hw.seg_dups.drop() segmental_dup = CsvReader(USCS_SEGMENTAL_DUP_TSV, delimiter='\t') filtered_data = [] for document in segmental_dup.data: document['chrom'] = document['chrom'][3:] document['otherChrom'] = document['otherChrom'][3:] if len(document['chrom']) < 3: xstart = get_xpos(document['chrom'], document['chromStart']) xend = get_xpos(document['chrom'], document['chromEnd']) document['xstart'] = xstart document['xend'] = xend filtered_data.append(document) segmental_dup.data = filtered_data segmental_dup.import_to_db(db.hw, 'seg_dups') db.hw.seg_dups.create_index([('chrom', pymongo.ASCENDING)], name='chrom_1') db.hw.seg_dups.create_index([('chromStart', pymongo.ASCENDING)], name='chromStart_1') db.hw.seg_dups.create_index([('chromEnd', pymongo.ASCENDING)], name='chromEnd_1') db.hw.seg_dups.create_index([('xstart', pymongo.ASCENDING)], name='xstart_1') db.hw.seg_dups.create_index([('xend', pymongo.ASCENDING)], name='xend_1')
def load(self, path): csv_reader = CsvReader(path) self.list_vertex, self.list_triangles = csv_reader.read_ply() self.list_triangles = np.array(self.list_triangles) self.list_vertex = np.array(self.list_vertex) self.num_vertex = len(self.list_vertex) self.num_triangles = len(self.list_triangles)
def __init__ (self, path=None): if path is None: path = "/Users/ostwald/devel/opensky/pubs_to_grants/ARTICLES_award_id_data/DOI_Reference_TABLE.csv" self.last_mod_map = {} self.pid_map = {} CsvReader.__init__ (self, path) for rec in self.data: self.last_mod_map[rec['lastmod']] = rec self.pid_map[rec['pid']] = rec
def run(): config = utils.load_cfg('conf.cfg') template_path = config.get('image', 'filename') markers = {'school': config.getint('markers', 'school'), 'year': config.getint('markers', 'year'), 'level': config.getint('markers', 'level'), 'award': config.getint('markers', 'award'), 'recipient': config.getint('markers', 'recipient')} font = {'color': config.get('font', 'color'), 'name': config.get('font', 'name'), 'size': config.getint('font', 'size')} images_per_pdf = config.getint('pdf', 'images_per_pdf') if images_per_pdf > 6: # 6 is the maximum allowed number of images per pdf exit() csv_file = config.get('csv', 'filename') cc = CsvReader(csv_file) csv_data = cc.read() school = 'John Scottus School' year = '2016' image_folder = utils.create_folder('images') pdf_folder = utils.create_folder('pdfs') count = 0 images = [] draw_tool = ImageWriter(font) for row in csv_data: im = draw_tool.open_image(template_path) im = draw_tool.write_text(im, markers['school'], school) im = draw_tool.write_text(im, markers['year'], year) im = draw_tool.write_text(im, markers['level'], row['Level']) im = draw_tool.write_text(im, markers['award'], row['Award']) im = draw_tool.write_text(im, markers['recipient'], row['Recipient']) im_path = image_folder + '/' + 'image_' + utils.timestamp() + '.png' draw_tool.save_image(im, im_path) count += 1 images.append(im_path) if count % images_per_pdf == 0: create_pdf(pdf_folder, images) count = 0 images = [] create_pdf(pdf_folder, images)
def __init__(self, path, filter_args={}, sort_args={}): self.sort_spec = SortSpec(**sort_args) self.filter_spec = FilterSpec(**filter_args) CsvReader.__init__(self, path) print '{} records before filtering'.format(len(self.data)) # print 'filter spec: {}'.format(self.filter_spec.spec) if filter_args is not None: self.filter_data() if self.sort_spec is not None: self.data.sort(key=lambda x: x[self.sort_spec.spec['field']]) if self.sort_spec.spec['reverse']: self.data.reverse()
def import_csv_data(self, filename: str) -> None: array_of_strap_data = CsvReader(filename).get_data() for strap_data in array_of_strap_data: name, quantity, colour, company_code = strap_data[0], strap_data[ 1], strap_data[2], strap_data[3] strap = Strap(name, quantity, colour, company_code) self._add_strap_to_table(strap)
def get_org_units(self): with open(OrgUnit.config['csv_file'], 'r') as csvFile: file_reader = csv.reader(csvFile) file_reader.next() data = CsvReader.read_csv_data() attributes = {} for row in file_reader: for index in data: attributes.update({data[index]: row[index]}) org_unit = OrgUnit(attributes) OrgUnit.organization_units.append(org_unit.__dict__) contents = '' contents += '[' count = 1 for org_unit in OrgUnit.organization_units: if count < len(OrgUnit.organization_units): contents += json.dumps(org_unit)+',' else: contents += json.dumps(org_unit) count += 1 contents += ']' return contents
def get_org_groups(self): with open(CsvReader.config['csv_file'], 'r') as csvFile: data = [] data_vol = [] file_reader = csv.reader(csvFile) file_reader.next() code_key = CsvReader.get_key_code('code') #name_key = CsvReader.get_key_code('name') identifier_key = CsvReader.get_key_code(Configuration.config['org_groups'].itervalues().next()) for row in file_reader: if row[identifier_key] not in data: data.append(row[identifier_key]) orgGroup = OrgUnitGroup(row[code_key],row[identifier_key]) data_vol.append(orgGroup.__dict__) return json.dumps(data_vol)
def write_doi_listing(csv_path, out_path): reader = CsvReader(csv_path) print '{} records'.format(len(reader.data)) fp = open(out_path, 'w') for rec in reader.data: fp.write(rec['doi'] + '\n') fp.close() print 'wrote {} dois to {}'.format(len(reader.data), os.path.basename(out_path))
def import_mouse_het_lethal_knockout_genes(db): mouse_to_human_gene_names = {} mouse_to_human = CsvReader(MOUSE_TO_HUMAN_TSV, delimiter='\t') for document in mouse_to_human.data: mouse_to_human_gene_names[ document['mouse_gene_name']] = document['human_gene_name'] gene_names_het_lethal = set([]) gene_transcripts_het_lethal = set([]) mouse_abnormal_survival = CsvReader(MOUSE_HET_LETHAL_TSV, delimiter='\t') for document in mouse_abnormal_survival.data: mouse_gene_name = document['Ontology Annotation Subject . Symbol'] mouse_zygosity = document['Subject Zygosity'] mouse_phenotype = document['Ontology Annotation Term Name'] # Ignore genes for which there are no human homologs if mouse_gene_name not in mouse_to_human_gene_names: continue human_gene_name = mouse_to_human_gene_names[mouse_gene_name] transcript_id = get_gene_canonical_transcript_by_name( db, human_gene_name) # Ignore genes which names were not found in ExAC if not transcript_id: continue exac_gene = db.exac.genes.find_one( {'canonical_transcript': transcript_id}) gene_name = exac_gene['gene_name'] gene_names_het_lethal.add(gene_name) gene_transcripts_het_lethal.add(transcript_id) db.gevir.mouse_het_lethal.drop() db.gevir.mouse_het_lethal.insert({ '_id': 'gene_names', 'data': list(gene_names_het_lethal) }) db.gevir.mouse_het_lethal.insert({ '_id': 'transcript_ids', 'data': list(gene_transcripts_het_lethal) })
def write_pid2doi_json(csv_path, out_path): reader = CsvReader(csv_path) print '{} records'.format(len(reader.data)) json_data = {} for rec in reader.data: json_data[rec['pid']] = rec['doi'] fp = open(out_path, 'w') fp.write(json.dumps(json_data, indent=4)) fp.close() print 'wrote {} mappings to {}'.format(len(reader.data), os.path.basename(out_path))
def __init__(self,code,name,orgUnits = None): self.code = code self.name = name self.shortName = name org_units = [] with open(CsvReader.config['csv_file'],'r') as csvFile: file_reader = csv.reader(csvFile) for row in file_reader: if self.name in row: org_units.append({'code': row[CsvReader.get_key_code('code')]}) self.organisationUnits = org_units
def __init__(self, code, name, organisationUnitGroups=None): self.code = code self.name = name with open(Configuration.config['csv_file'], 'r') as csvFile: data = [] data_vol = [] file_reader = csv.reader(csvFile) file_reader.next() code_key = CsvReader.get_key_code('code') #name_key = CsvReader.get_key_code('name') group_identifier_key = CsvReader.get_key_code( Configuration.config['org_groups'].itervalues().next()) group_set_identifier_key = CsvReader.get_key_code( Configuration.config['org_group_sets'].itervalues().next()) for row in file_reader: if row[group_identifier_key] not in data: if row[group_set_identifier_key] not in data: data.append(row[group_identifier_key]) data_vol.append({'code': row[code_key]}) self.organisationUnitGroups = data_vol
def import_tandem_repeats(db): db.hw.tandem_repeats.drop() tandem_repeats = CsvReader(USCS_TANDEM_REPEATS_TSV, delimiter='\t') pos_repeats = set([]) total_lines = len(tandem_repeats.data) line_number = 0 bar = progressbar.ProgressBar(maxval=1.0).start() for document in tandem_repeats.data: # added for patch chroms like chr19_gl000209_random if len(document['chrom']) > 5: continue chrom = document['chrom'][3:] start = document['chromStart'] end = document['chromEnd'] xstart = get_xpos(chrom, start) xend = get_xpos(chrom, end) for xpos in range(xstart, xend + 1): pos_repeats.add(xpos) line_number += 1 bar.update((line_number + 0.0) / total_lines) bar.finish() total_lines = len(pos_repeats) line_number = 0 bar = progressbar.ProgressBar(maxval=1.0).start() bulk = db.hw.tandem_repeats.initialize_unordered_bulk_op() counter = 0 for xpos in pos_repeats: bulk.insert({'_id': xpos}) counter += 1 if (counter % 500 == 0 and counter > 0): bulk.execute() bulk = db.hw.tandem_repeats.initialize_ordered_bulk_op() line_number += 1 bar.update((line_number + 0.0) / total_lines) if (counter % 500 != 0): bulk.execute() bar.finish()
def read_clin_var_cites(): clin_var_cites = {} clin_vars = CsvReader(CLIN_VAR_CITES_TSV, delimiter='\t') total_lines = len(clin_vars.data) line_number = 0 bar = progressbar.ProgressBar(maxval=1.0).start() for document in clin_vars.data: allele_id = document['#AlleleID'] citation_id = document['citation_id'] if allele_id in clin_var_cites: clin_var_cites[allele_id].add(citation_id) else: clin_var_cites[allele_id] = set([citation_id]) line_number += 1 bar.update((line_number + 0.0) / total_lines) bar.finish() return clin_var_cites
def write_doi2date_json(csv_path, out_path): reader = CsvReader(csv_path) print '{} records'.format(len(reader.data)) json_data = {} in_fmt = '%m/%d/%y' out_fmt = '%Y-%m-%d' for rec in reader.data: pubdate = rec['pub_date'] try: date_val = time.strftime(out_fmt, time.strptime(pubdate, in_fmt)) except: print "ERROR: could not parse {}: {}".format( pubdate, sys.exc_info()[1]) date_val = '' json_data[rec['doi']] = date_val fp = open(out_path, 'w') fp.write(json.dumps(json_data, indent=4)) fp.close() print 'wrote {} mappings to {}'.format(len(reader.data), os.path.basename(out_path))
def import_uneecon_g(db): uneecon_g_genes = [] uneecon_g = CsvReader(UNEECON_G_TSV, delimiter='\t') for document in uneecon_g.data: gene_info = document['#gene'] gene_id, gene_name = gene_info.split('|') gene_id = gene_id.split('.')[0] uneecon_score = float(document['UNEECON-G']) exac_gene = db.exac.genes.find_one({'gene_id': gene_id}) transcript_id = exac_gene['canonical_transcript'] uneecon_g_gene = Uneecon_G(transcript_id, gene_id, gene_name, uneecon_score) uneecon_g_genes.append(uneecon_g_gene.get_dictionary()) db.gevir.uneecon_genes.drop() db.gevir.uneecon_genes.insert_many(uneecon_g_genes) db.gevir.uneecon_genes.create_index([('gene_id', pymongo.ASCENDING)], name='gene_id_1') db.gevir.uneecon_genes.create_index([('gene_name', pymongo.ASCENDING)], name='gene_name_1') db.gevir.uneecon_genes.create_index([('uneecon_g', pymongo.ASCENDING)], name='uneecon_g_1')
class Window(QMainWindow): """ Main GUI class for application """ csv_path = None # # dict for storing attibutes data # as associative dict of col/attr name and AttributeInfo # attrs = {} def __init__(self): QWidget.__init__(self) # loaind ui from xml uic.loadUi(os.path.join(DIRPATH, 'app.ui'), self) # button event handlers self.btnXmlTemplate.clicked.connect(self.handleXmlTemplate) self.btnBrowseData.clicked.connect(self.handleBrowseData) self.btnSave.clicked.connect(self.handleSave) self.btnSaveClose.clicked.connect(self.handleSaveClose) self.btnCancel.clicked.connect(self.handleCancel) # attribute definition and definition source change event handlers self.teAttributeDef1.textChanged.connect( self.handleAttributeDefChanged1) self.teAttributeSource1.textChanged.connect( self.handleAttributeSourceChanged1) self.teAttributeDef2.textChanged.connect( self.handleAttributeDefChanged2) self.teAttributeSource2.textChanged.connect( self.handleAttributeSourceChanged2) self.teAttributeDef3.textChanged.connect( self.handleAttributeDefChanged3) self.teAttributeSource3.textChanged.connect( self.handleAttributeSourceChanged3) # ENUMERATED data change event handlers self.teEnumDef.textChanged.connect(self.enumDefinitionChanged) self.teEnumSrc.textChanged.connect(self.enumSourceChanged) self.lstEnumUnique.currentItemChanged.connect(self.enumUniqueChanged) # RANGE min, max, unit, resolution data change event handlers self.teRangeMin.textChanged.connect(self.handleRangeMinChanged) self.teRangeMax.textChanged.connect(self.handleRangeMaxChanged) self.teUnit.textChanged.connect(self.handleUnitChanged) self.teResolution.textChanged.connect(self.handleResolutionChanged) # CODESET name and source change event handler self.teCodesetName.textChanged.connect(self.handleCodesetNameChanged) self.teCodesetSource.textChanged.connect( self.handleCodesetSourceChanged) # UNREPRESENTABLE desc filed change event handler self.teUnreprDesc.textChanged.connect( self.handleUnreprensentableChanged) # radio buttons signal handlers self.rbEnumerated.toggled.connect(self.fieldTypeChanged) self.rbRange.toggled.connect(self.fieldTypeChanged) self.rbCodeset.toggled.connect(self.fieldTypeChanged) self.rbUnrepresentable.toggled.connect(self.fieldTypeChanged) # convenience dict of radio button names and objects self.dict_rbs = { ENUMERATED: self.rbEnumerated, RANGE: self.rbRange, CODESET: self.rbCodeset, UNREPRESENTABLE: self.rbUnrepresentable } # adding listener for left side list's item changed self.lstItems.currentItemChanged.connect(self.attributeChanged) # gui setup self.setupGui() def setupGui(self): """ Initialize gui and defualt values etc """ # default initialization of GUI self.lblMeasurementDesc.setText(TXT_MEASUREMENT_DESC) # setting desc for domain detail labels for dunamic widgets self.lblEnumeratedDomain.setText(TXT_ENUMERATED_DOMAIN) self.lblRangeDomain.setText(TXT_RANGE_DOMAIN) self.lblCodesetDomain.setText(TXT_CODESET_DOMAIN) self.lblUnrepresentableDomain.setText(TXT_UNPRESENTABLE_DOMAIN) # default field type select in ui is 'enumerated' self.field_type = ENUMERATED # default field type is 'enumerated' self.rbEnumerated.setChecked(True) # overview text self.teOverview.setText(TXT_OVERVIEW) # citation text self.teCitation.setText(TXT_CITATION) def ensureAttributeType(self): if not self.lstItems.count(): self.attr_type = None return if not self.lstItems.currentItem(): self.lstItems.setCurrentRow(0) self.attr_type = self.lstItems.currentItem().text() def attributeChanged(self): """ Function dynamically shows widgets on right side of tab 2 Whenever an item is selected/changed on lest side list. Radio buttons on right side are automatically selected, depending upon the values of attribute's column in csv file """ self.ensureAttributeType() if not self.attr_type: print("[WARNING] attributeChanged() :: not attr type !!!") return self.refresh_enumerated_list_values() ################################################ # # # Enable/disable radio buttons on right side # # depending upon the data of select attribute # # associated with a column in csv file # # # ################################################ if not self.csv_reader: print("[WARNING] :: No csv loaded to read values") return # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] attributeChanged() :: attributes dict is not initialized" ) # first time - default vals self.teAttributeDef1.setText( "%s %s" % (ATTRIBUTE_DEFINITION, self.attr_type)) self.teAttributeSource1.setText("%s %s" % (DATA_SOURCE, self.attr_type)) self.teAttributeDef2.setText( "%s %s" % (ATTRIBUTE_DEFINITION, self.attr_type)) self.teAttributeSource2.setText("%s %s" % (DATA_SOURCE, self.attr_type)) self.teAttributeDef3.setText( "%s %s" % (ATTRIBUTE_DEFINITION, self.attr_type)) self.teAttributeSource3.setText("%s %s" % (DATA_SOURCE, self.attr_type)) return #################################################### # # # Fill UI with information from selected attribute # # # #################################################### attr = self.attrs[self.attr_type] # update text fields for attribute definition and definition source # from values in the current attribute self.teAttributeDef1.setText(attr.defn1) self.teAttributeSource1.setText(attr.dsrc1) self.teAttributeDef2.setText(attr.defn2) self.teAttributeSource2.setText(attr.dsrc2) self.teAttributeDef3.setText(attr.defn3) self.teAttributeSource3.setText(attr.dsrc3) ucnt = self.csv_reader.number_of_Uvalues_per_col(self.attr_type) dtype = str(attr.dtype) # checking/selecting radio button depending upon the type # determined for given cols datatype and unique counts ft = self.det_rb_type(self.attr_type) ft = attr.field_type if ft: self.dict_rbs[ft].setChecked(True) else: print("WARNING :: No radio buttion selected for '%s'" % self.attr_type) #FIXME - unchecking all radio buttons or suggest me what to do !!!! self.uncheck_all_radio_buttons() self.showCurrentAttributeDataInWidgets() def uncheck_all_radio_buttons(self): for k, v in self.dict_rbs.iteritems(): self.dict_rbs[k].setChecked(False) def show_range_widget(self): self.widRange.setHidden(False) self.horizontalLayout.addWidget(self.widRange) def show_codeset_widget(self): self.widCodeset.setHidden(False) self.horizontalLayout.addWidget(self.widCodeset) def show_unrepresentable_widget(self): self.widUnrepresentable.setHidden(False) self.horizontalLayout.addWidget(self.widUnrepresentable) def show_enumerated_widget(self): self.widEnumerated.setHidden(False) self.horizontalLayout.addWidget(self.widEnumerated) def handleXmlTemplate(self): """ Browse and select the input xml file """ print('Xml Template') filenames = getFileFromDialog() self.teXmlTemplate.setText(filenames[0]) def handleBrowseData(self): """ Browse and read the input csv file """ # set old one to null self.csv_reader = None self.attr_type = None self.attrs = {} self.lstItems.clear() self.lstEnumUnique.clear() self.field_type = None filenames = getFileFromDialog() self.teBrowseData.setText(filenames[0]) # creating csv reader self.csv_path = filenames[0] self.csv_reader = CsvReader(self.csv_path) # adding list items on left side for col in self.csv_reader.get_col_pd_names(): self.lstItems.addItem(col) # select first row if there are elements in list if self.lstItems.count() > 0: self.lstItems.setCurrentRow(0) self.attr_type = self.lstItems.currentItem().text() self.refresh_enumerated_list_values() self.populate_attributes_data() self.showCurrentAttributeDataInWidgets() def det_rb_type(self, col): """ Function determines the radio button type depending upon the datatype and count of unique values in the given col in csv return: ENUMERATED, RANGE, CODESET, UNREPRESENTABLE Note: Function raise an exception if rb type can't be determined """ ucnt = self.csv_reader.number_of_Uvalues_per_col(col) dtype = str(self.csv_reader.det_datatypes_col(col)) # checking float64 columns for radio buttons if dtype == "float64": """ Unrepresentable if the unique values are less than 23 and the datatype is float64. """ if ucnt < 23: return UNREPRESENTABLE """ Range radio button to be selected if there are 23-40 unique vales and the datatype is float64. """ if ucnt >= 23 and ucnt <= 40: return RANGE """ Codeset radio button to be selected if there are more than 40 unique values and the datatype is float64. """ if ucnt > 40: return CODESET #FIXME # what to return in else case ????? # checking object columns for radio buttons if dtype == "object": """ Enumerated if the datatype is 'object' and there are less than 20 unique values. """ if ucnt < 20: return ENUMERATED ################################################ # # # NOTE # # Default is UNREPRESENTABLE type when we cant # # Determine radio button type as above # # # ################################################ return UNREPRESENTABLE def populate_attributes_data(self): """ Function populates attributes info in data structure i.e. a python dict for AttributeInfo objects and their associated data """ # clearing previous data from dict self.attrs = {} self.attr_type = None firstTime = True # create attribute info for each col in csv for col in self.csv_reader.get_col_pd_names(): print("[INFO] populating data for: %s" % col) # creating attribute info ai = AttributeInfo(col) ai.field_type = self.det_rb_type(col) # ensure field type if not ai.field_type: print( "[WARNING] :: cant populate attribute info for '%s', as field type is null" % col) continue #FIXME or add a fallback field type UNREPRESENTABLE ! ai.set_unique_count(self.csv_reader.number_of_Uvalues_per_col(col)) ai.set_data_type(self.csv_reader.det_datatypes_col(col)) ai.set_defn1("%s %s" % (ATTRIBUTE_DEFINITION, col)) ai.set_dsrc1("%s %s" % (DATA_SOURCE, col)) ai.rmin = self.csv_reader.get_col_min(col) ai.rmax = self.csv_reader.get_col_max(col) ai.en_unique_index = 0 ai.en_unique_val = ai.rmin # check radio button and set val for range min/max textboxes for first attribute if firstTime: firstTime = False self.dict_rbs[ai.field_type].setChecked(True) self.teRangeMax.setText(str(ai.rmax)) self.teRangeMin.setText(str(ai.rmin)) # lets see the attribute info ai.show() # keeping attribute info in dict self.attrs[col] = ai def refresh_enumerated_list_values(self): self.lstEnumUnique.clear() if not self.csv_reader: print("[WARNING] :: No csv loaded to read values") return if not self.attr_type: print( "[WARNING] :: refresh_enumerated_list_values() cant find current attribute" ) return # adding unique values in enumerated widget's list widget for val in self.csv_reader.get_unique_values_col(self.attr_type): self.lstEnumUnique.addItem(str(val)) if self.attrs: attr = self.attrs[self.attr_type] if attr.en_unique_val: self.lstEnumUnique.setCurrentRow(attr.en_unique_index) return # select first row if there are elements in list elif self.lstEnumUnique.count() > 0: self.lstEnumUnique.setCurrentRow(0) def handleSave(self): if not self.teXmlTemplate.toPlainText(): print("[ERROR] :: XML output not specified") self.show_msgbox("Error", "Output Xml file not specified !") return ########################################################## # # # Create xml writer and pass ui to it , XML Writer will # # fetch data from ui and save/update in the output xml # # # ########################################################## self.xml_writer = XmlWriter(self.teXmlTemplate.toPlainText()) self.xml_writer.save(self.attrs, self.teCitation.toPlainText(), self.teCitation.toPlainText()) def handleSaveClose(self): self.handleSave() QApplication.quit() def handleCancel(self): QApplication.quit() def fieldTypeChanged(self): self.widRange.setHidden(True) self.widCodeset.setHidden(True) self.widUnrepresentable.setHidden(True) self.widEnumerated.setHidden(True) # remove all widgets from horizontal layout for i in reversed(range(self.horizontalLayout.count())): self.horizontalLayout.removeWidget( self.horizontalLayout.itemAt(i).widget()) if self.rbEnumerated.isChecked(): self.field_type = ENUMERATED self.show_enumerated_widget() if self.rbRange.isChecked(): self.field_type = RANGE self.show_range_widget() if self.rbCodeset.isChecked(): self.field_type = CODESET self.show_codeset_widget() if self.rbUnrepresentable.isChecked(): self.field_type = UNREPRESENTABLE self.show_unrepresentable_widget() ################################################ # # # Update field type of selected attribute when # # a radio button is changed # # # ################################################ # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] fieldTypeChanged() :: attributes dict is not initialized" ) return # beacuse of async signal/slot for various ui elements # would be good to perform this check if not self.attr_type and self.lstItems.count() > 0: self.attr_type = self.lstItems.currentItem().text() self.printAttrs() self.attrs[self.attr_type].field_type = self.field_type self.showCurrentAttributeDataInWidgets() def printAttrs(self): """ convenience funciton to watch attributes """ print("-------------[ Attributes ]-------------------") for k, v in self.attrs.iteritems(): print(k, v) print("----------------------------------------------") def showCurrentAttributeDataInWidgets(self): """ Function display the data of current selected attribute in the dynamic widgets on the right side """ if not self.attr_type: return print("[showCurrentAttributeDataInWidgets]") attr = self.attrs[self.attr_type] # showing data in enumerated widget if attr.en_def: self.teEnumDef.setText(attr.en_def) else: self.teEnumDef.setText("") if attr.en_src: self.teEnumSrc.setText(attr.en_src) else: self.teEnumSrc.setText("") if attr.en_unique_val: self.lstEnumUnique.setCurrentRow(attr.en_unique_index) # showing data in range widget if attr.rmin: self.teRangeMin.setText(str(attr.rmin)) else: self.teRangeMin.setText("") if attr.rmax: self.teRangeMax.setText(str(attr.rmax)) else: self.teRangeMax.setText("") if attr.unit: self.teUnit.setText(str(attr.unit)) else: self.teUnit.setText("") if attr.resolution: self.teResolution.setText(str(attr.resolution)) else: self.teResolution.setText("") # showing data in codeset widget if attr.cs_name: self.teCodesetName.setText(str(attr.cs_name)) else: self.teCodesetName.setText("") if attr.cs_src: self.teCodesetSource.setText(str(attr.cs_src)) else: self.teCodesetSource.setText("") # showing data unrepresentable widget if attr.ur_desc: self.teUnreprDesc.setText(str(attr.ur_desc)) else: self.teUnreprDesc.setText("") def handleAttributeDefChanged1(self): """ Function is called when ever data is changed in "attribute definition 1" text box """ # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleAttributeDefChanged1() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update definition value for current attribute self.attrs[self.attr_type].defn1 = self.teAttributeDef1.toPlainText() def handleAttributeDefChanged2(self): """ Function is called when ever data is changed in "attribute definition 2" text box """ # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleAttributeDefChanged2() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update definition value for current attribute self.attrs[self.attr_type].defn2 = self.teAttributeDef2.toPlainText() def handleAttributeDefChanged3(self): """ Function is called when ever data is changed in "attribute definition 3" text box """ # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleAttributeDefChanged3() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update definition value for current attribute self.attrs[self.attr_type].defn3 = self.teAttributeDef3.toPlainText() def enumDefinitionChanged(self): """ Function is called when ever data is changed in enumerated definition """ # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] enumDefinitionChanged() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update definition source value for current attribute self.attrs[self.attr_type].en_def = self.teEnumDef.toPlainText() def enumSourceChanged(self): """ Function is called when ever data is changed in enumerated value """ # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] enumSourceChanged() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update definition source value for current attribute self.attrs[self.attr_type].en_src = self.teEnumSrc.toPlainText() def enumUniqueChanged(self): """ Function is called when ever data is changed in enumerated unique list """ # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] enumUniqueChanged() :: attributes dict is not initialized" ) return if not self.lstEnumUnique.count(): print("[WARNING] enumUniqueChanged() :: lst is empty") return if not self.lstEnumUnique.currentItem(): print("[WARNING] enumUniqueChanged() :: lst has not item selected") return self.ensureAttributeType() # update enum unique value for current attribute self.attrs[ self.attr_type].en_unique_index = self.lstEnumUnique.currentRow() self.attrs[ self.attr_type].en_unique_val = self.lstEnumUnique.currentItem( ).text() def handleAttributeSourceChanged1(self): """ Function is called when ever data is changed in "attribute definition source 1" text box """ # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleAttributeSourceChanged1() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update definition source value for current attribute self.attrs[ self.attr_type].dsrc1 = self.teAttributeSource1.toPlainText() def handleAttributeSourceChanged2(self): """ Function is called when ever data is changed in "attribute definition source 2" text box """ # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleAttributeSourceChanged2() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update definition source value for current attribute self.attrs[ self.attr_type].dsrc2 = self.teAttributeSource2.toPlainText() def handleAttributeSourceChanged3(self): """ Function is called when ever data is changed in "attribute definition source 3" text box """ # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleAttributeSourceChanged3() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update definition source value for current attribute self.attrs[ self.attr_type].dsrc3 = self.teAttributeSource3.toPlainText() def handleRangeMinChanged(self): """ Function is called when ever range min is changed """ print("[handleRangeMinChanged] range min: ", self.teRangeMin.toPlainText()) # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleRangeMinChanged() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update range min value for current attribute self.attrs[self.attr_type].rmin = self.teRangeMin.toPlainText() def handleRangeMaxChanged(self): """ Function is called when ever range max is changed """ print("[handleRangeMaxChanged] range max: ", self.teRangeMax.toPlainText()) # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleRangeMaxChanged() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update range max value for current attribute self.attrs[self.attr_type].rmax = self.teRangeMax.toPlainText() def handleUnitChanged(self): """ Function is called when ever range unit is changed """ print("[handleUnitChanged] range unit: ", self.teUnit.toPlainText()) # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleUnitChanged() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update range unit value for current attribute self.attrs[self.attr_type].unit = self.teUnit.toPlainText() def handleResolutionChanged(self): """ Function is called when ever range resolution is changed """ print("[handleResolutionChanged] resolution: ", self.teResolution.toPlainText()) # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleResolutionChanged() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update range resolution value for current attribute self.attrs[self.attr_type].resolution = self.teResolution.toPlainText() def handleCodesetNameChanged(self): """ Function is called when ever codeset name is changed """ print("[handleCodesetNameChanged] codeset name: ", self.teCodesetName.toPlainText()) # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleCodesetNameChanged() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update codeset name self.attrs[self.attr_type].cs_name = self.teCodesetName.toPlainText() def handleCodesetSourceChanged(self): """ Function is called when ever codeset source is changed """ print("[handleCodesetSourceChanged] codeset source: ", self.teCodesetName.toPlainText()) # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleCodesetSourceChanged() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update codeset source self.attrs[self.attr_type].cs_src = self.teCodesetSource.toPlainText() def handleUnreprensentableChanged(self): """ Function is called when ever UNREPRESENTABLE desc is changed """ print("[handleUnreprensentableChanged] unrepresentable: ", self.teUnreprDesc.toPlainText()) # if attributes dict is not initialized not need to continue if not self.attrs: print( "[WARNING] handleUnreprensentableChanged() :: attributes dict is not initialized" ) return self.ensureAttributeType() # update unrepresentable value for current attribute self.attrs[self.attr_type].ur_desc = self.teUnreprDesc.toPlainText() def show_msgbox(self, title, text): """ Function for showing error/info message box """ msg = QMessageBox() msg.setIcon(QMessageBox.Information) msg.setText(text) msg.setWindowTitle(title) msg.setStandardButtons(QMessageBox.Ok) retval = msg.exec_() print("[INFO] Value of pressed message box button:", retval)
def main(): ''' This is the entrypoint of the whole application. From the CMD, the user can specify either 'train' or 'test' after 'python app.py' to choose the mode which they would like to execute. ''' # handle arguments from CMD argument_parser = argparse.ArgumentParser(description='Choose whether you want to train network or test network') argument_parser.add_argument('config', help='choose whether to train or test network') args = argument_parser.parse_args().config print('the inputted args is', args) if args is None: print('This configuration is unsupported. Please enter [train] or [test]') return # file paths for training data faces_file_path = 'face-images-with-marked-landmark-points/face_images.npz' csv_file_path = 'face-images-with-marked-landmark-points/facial_keypoints.csv' # helper classes image_reader = ImageReader(faces_file_path) csv_reader = CsvReader(csv_file_path) image_data = image_reader.get_array() image_data = image_data['face_images'] image_data = np.moveaxis(image_data, -1, 0) # Must map all the images into 3-channels image_data = map_images_to_3_channels(image_data) # Sample image # ----------------------------------------- # image = image_data[1] # image = convert_to_3_channels(image) # print(image.shape) # plt.imshow((image * 255).astype(np.uint8)) # plt.show() # ----------------------------------------- # this is a dataframe df = csv_reader.get_facial_features() # This gets all the coordinates from the dataframe coordinates_list = extract_coordinates_list_unzipped(df) coordinates_list = np.array(coordinates_list) coordinates_list = scale_coordinates(coordinates_list) # draw coordinates on image # validation to ensure data is clean validate_data(image_data, coordinates_list) # extracting and separating data as appropriate X_train, y_train, X_test, y_test = split(image_data, coordinates_list) if args == 'train': print('[train] configuration selected. Training.') # image_reader = ImageReader('training_data.npz') # data = image_reader.get_array() # X_train = data['images'] # y_train = data['coordinates_list'] loader = Loader() X_train = loader.load_from_filepath('images.npy') y_train = loader.load_from_filepath('coordinates_list.npy') print('images size', len(X_train)) print('coordinates size', len(y_train)) assert len(X_train) == len(y_train) train(get_custom_model(), X_train, y_train, X_test, y_test) elif args == 'test': print('[test] configuration selected. Testing.') model = get_trained_model('modelMKIII.h5') prediction = predict_images(model, X_test) assert len(X_test) == len(prediction) draw_images(X_test, prediction) else: print('This configuration is unsupported. Please enter [train] or [test]')
use_gender = True unigrams = read_unigrams_for_feature(unigram_capacity) print ">>", len(unigrams), 'unigram features:', unigrams bigrams = read_bigrams_for_feature(bigram_capacity) print ">>", len(bigrams), 'bigram features:', bigrams bag_of_words = read_unigrams_for_feature(0) # 84 print ">> a bag of words of size ", len(bag_of_words), ":", bag_of_words bag_of_bigrams = read_bigrams_for_feature(0) # 5 print ">> a bag of bigrams of size ", len( bag_of_bigrams), ":", bag_of_bigrams print "Training data loading..." training_data = CsvReader(constants.TRAINING_DATA_FILE) print "Training data loaded" print "Creating the topic model..." topic_model_feature = TopicModelFeature() #topic_model_feature = None print "Creation of the topic model done" all_features, neu_labels = process_training_data(0, training_data, unigrams, bigrams, bag_of_words, bag_of_bigrams, topic_model_feature) print "Testing data loading..." test_data = CsvReader(constants.TEST_DATA_FILE, True)
import sys import constants from csv_reader import CsvReader if __name__ == "__main__": weka_output = sys.argv[1] neu_indices = list() for line in open(weka_output, 'r'): row = line.split() row = [r.strip() for r in row] if float(row[2]) >= 28: neu_indices.append(int(row[0]) - 1) # Weka indices start from 1 print neu_indices test_data = CsvReader(constants.TEST_DATA_FILE, True) author_ids_test = test_data.get_author_ids() print len(neu_indices) print "class,predictions" print "+, ", for i in range(0, len(neu_indices)): print author_ids_test[neu_indices[i]],
def test_read_data_from_csv_reader(self): csv_reader = CsvReader('test/test.csv') data = csv_reader.read() self.assertEqual(data, [{'key_1': 'value_1', 'key_2': 'value_2'}])
def get_feature_by_punctuation(self, punc): aut_statuses = self._data_set.init_status_group_by_author() punc_counts = list() for statuses in aut_statuses: count = 0 for status in statuses: for c in status: if c == punc: count += 1 punc_counts.append(count) return punc_counts if __name__ == "__main__": training_data = CsvReader(constants.TRAINING_DATA_FILE) fr = WordFeature(training_data) author_ids, neus = training_data.get_author_ids_and_neu_labels() # author_ids = fr.get_author_list() # print len(author_ids), ' author ids ', author_ids[0], ' ... ', author_ids[-1], ' loaded: ', author_ids.index('2ab5dc09af49cf5114e60087919f1f3a') # neus = fr.get_nue_list() # print len(neus), ' neus ', neus[0], ' ... ', neus[-1] # author_statuses = fr.read_status_group_by_author() # print len(author_statuses), ' author statuses ' # , author_statuses[0], ' ... ', author_statuses[author_ids.index('2ab5dc09af49cf5114e60087919f1f3a')] # avg_lens = fr.get_author_avg_lens() # print len(avg_lens), ' avg_lens:', avg_lens[0], ' ', avg_lens[1], '...', avg_lens[-1] # print 'average length feature: ', pearsonr(avg_lens, neus) # avg_afinns = fr.get_feature_by_afinn('../dataset/AFINN-111.txt') # print len(avg_afinns), ' avg_afinns:', avg_afinns[0], ' ', avg_afinns[1], '...', avg_afinns[-1]
def import_gdit_genes(db): gdit_genes = CsvReader(GENE_DISCOVERY_INFORMATION_TOOLKIT_LEK) gdit_genes.import_to_db(db.hw, 'gdit_genes') db.hw.gdit_genes.create_index([('gene', pymongo.ASCENDING)], name='gene_1')
import sys sys.path.insert(0, '../library') from csv_reader import CsvReader task = "hw1" # replace with absolute address of assignments folder base_dir = "../hw1" grade_suffix = "q1" scale = 10 new_task = CsvReader(task, base_dir) new_task.read(grade_suffix, scale)
# -*- coding: cp936 -*- """ 生成各年的联合专利邻接矩阵 """ import os import sys from csv_reader import CsvReader from graph_maker import GraphMaker default_encoding = "utf-8" if default_encoding != sys.getdefaultencoding(): reload(sys) sys.setdefaultencoding(default_encoding) YEAR = ['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016'] INPUT_CSV = u'2007-2016医药卫生行业数据/2007-2016全国医药卫生联合专利.csv' PREFIX_LABEL = 'AM_医药卫生' if __name__ == '__main__': assert (os.path.exists(INPUT_CSV)), 'Such file \"' + INPUT_CSV + '\" does not exists!' for y in YEAR: y = str(y) my_reader = CsvReader(INPUT_CSV, start_time=y + '-01-01', end_time=y + '-12-31') print u'生成' + y + u'年邻接矩阵' my_graph_maker = GraphMaker(my_reader.joint_applicant_list()) my_graph_maker.write_adjacent_matrix(PREFIX_LABEL + '_' + y + '.csv')
import sys import constants from csv_reader import CsvReader if __name__ == "__main__": weka_output = sys.argv[1] neu_indices = list() for line in open(weka_output, 'r'): row = line.split() row = [r.strip() for r in row] if float(row[2]) >= 28: neu_indices.append(int(row[0]) - 1) # Weka indices start from 1 print neu_indices test_data = CsvReader(constants.TEST_DATA_FILE, True) author_ids_test = test_data.get_author_ids() print len(neu_indices) print "class,predictions" print "+, ", for i in range(0, len(neu_indices)): print author_ids_test[neu_indices[i]],
from csv_reader import CsvReader from DataScientistMethod import train_split_data from model import IrisModel #import os #dir_path = os.path.dirname(os.path.realpath(__file__)) #print("!!!!!!"+dir_path) # load data data = CsvReader.create_from_csv( "/Users/rklis/Projects/PSD_training/projectSprint/iris.csv", sep=";") # data preparation test_data, to_learn = train_split_data(data, 'iris_type', 0.3) irisModel = IrisModel(test_data, to_learn, data.get_types_of_data('iris_type')) irisModel.learn() for item in test_data: a = irisModel.predict(item) b = 1 #
from multipline import multiline from bar import bar from piechart import PieChart from stackplot import StackPlot from lineplot import LinePlot from histogram import Histogram from scatter import Scatter # multiline() # bar() from csv_reader import CsvReader from random import randint csv_reader = CsvReader('movies.csv') csv_reader.read_csv() # csv_reader.draw_films_by_years() # csv_reader.draw_films_by_profits() pie_chart = PieChart() pie_chart.draw_pie_chart() # stack_plot = StackPlot() # stack_plot.draw_stack_plot() # line_plot = LinePlot() # line_plot.draw_line_plot() # histogram_plot = Histogram() # histogram_plot.draw_histogram() scatter_plot = Scatter() # scatter_plot.draw_scatter() # scatter_plot.create_fake_csv() # scatter_plot.draw_scatter_from_csv()
from csv_entity.ali_csv import AliCsvRowData from csv_reader import CsvReader logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) csv_path = 'alipay/test.csv' # if os.path.isfile(csv_path): # with open(csv_path, "r") as csvfile: # reader = csv.reader(csvfile) # # 这里不需要readlines # for line in reader: # print line logger.debug("---") reader = CsvReader(csv_path) data_rows = reader.reader_csv() data_list = [] for row in data_rows: if unicode.strip(row[0].decode('utf8')) == u'交易号': continue else: data = AliCsvRowData(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], row[11], row[12], row[13], row[14], row[15]) logger.debug(data) data_list.append(data) logger.debug(len(data_list))
from csv_reader import CsvReader from img_reader import ImageReader ''' This is the entrypoint for data generation. It extracts data from given files of facial images and corresponding feature coordinates, generating and saving data as *npy files after peforming data augmentation ''' # file paths for training data faces_file_path = 'face-images-with-marked-landmark-points/face_images.npz' csv_file_path = 'face-images-with-marked-landmark-points/facial_keypoints.csv' # helper classes image_reader = ImageReader(faces_file_path) csv_reader = CsvReader(csv_file_path) image_data = image_reader.get_array() image_data = image_data['face_images'] image_data = np.moveaxis(image_data, -1, 0) # Must map all the images into 3-channels image_data = map_images_to_3_channels(image_data) # this is a dataframe df = csv_reader.get_facial_features() # this gets all the coordinates from the dataframe coordinates_list = extract_coordinates_list_unzipped(df) coordinates_list = np.array(coordinates_list) coordinates_list = scale_coordinates_list(coordinates_list)