def analysisSheet2List(self, sheet: sheet.Sheet, fieldlist: list): "解析sheet" if sheet.nrows == 0: return [] mergeDic, lastrow = self.mergedCell(sheet) firstrow = sheet.row_values(lastrow) head = {} for i in range(len(firstrow)): cell = sheet.cell(lastrow, i) if (lastrow, i) in mergeDic: cell = sheet.cell(*mergeDic[(lastrow, i)]) val = self.cell2Str(cell) if val in fieldlist: head[val] = i #logger.info(head) sheetList = [] for i in range(lastrow + 1, sheet.nrows): rowdic = self.line2Dic(sheet, i, head) if rowdic: sheetList.append(rowdic) #logger.info(sheetList) return sheetList
def line2Dic(self, sheet: sheet.Sheet, row, headMap: dict): "行转字典" retdic = {} empty = True for key in headMap: if headMap[key] == None or headMap[key] < 0: continue sval = self.cell2Str(sheet.cell(row, headMap[key])) if sval and empty: empty = False retdic[key] = self.transF2H(sval) if empty: return None else: return retdic
def buildNeihboorhoodProfiles(): workbook = xlrd.open_workbook(inpfile) print(inpfile) neighboorlist = [] for sheet in workbook.sheets(): idx = 0 r, c, field, idx = getNeighboorhoodField(idx) neighboor_fields = {} neighboor_fields['neighboorhood'] = sheet.name for row in range(sheet.nrows): for col in range(sheet.ncols): if row == r and col == c: neighboor_fields[field] = sheet.cell(r, c).value r, c, field, idx = getNeighboorhoodField(idx) if r < 0: neighboorlist.append(neighboor_fields) break return neighboorlist
def handle_date(path, pro_name, pro_type, pro_num): print('解析 excel:{} 开始......'.format(path)) data_arr = [] # excel_path = r'C:/Users/yamei/Desktop/原材料/付款/A280/A280 V3.1 30K付款计划20190522.xlsx' excel_path = r'' + str(path) book = xlrd.open_workbook(excel_path) sheet = book.sheet_by_index(0) rows = sheet.nrows cols = sheet.ncols print('sheet rows,cols:', rows, cols) for i in range(2, sheet.nrows): first_cell = sheet.cell(i, 0) # 第一行非数字,则说明此行数据是不重要的 if not is_number(first_cell.value) and first_cell.value != '': continue data = { '产品名称': pro_name, '型号': pro_type, '台数': pro_num, '公司名称': '', '结算方式': '', '预计交货时间': '' } data['公司名称'] = sheet.row_values(i-1)[1] if sheet.row_values(i)[1] == '' else sheet.row_values(i)[1] data['结算方式'] = sheet.row_values(i)[4] sub_time = sheet.cell(i, 7) if sub_time.ctype == 2 or sub_time.ctype == 3: sub_time_value = xldate_as_datetime(sub_time.value, 0).strftime('%Y-%m-%d') data['预计交货时间'] = sub_time_value else: data['预计交货时间'] = sheet.row_values(i)[7] for col in range(8, cols): # print('row, cols is:', i, col) # 空白列,跳过 if sheet.cell(1, col).value == '': continue time_result = time_formate(sheet.cell(1, col)) if time_result is None: # data[sheet.row_values(1)[col]] = sheet.row_values(i)[col] # 供应商主营业务,备注列 丢弃 continue else: # 时间字符串在4位,6位时间字符串则表示该列的值类型为时间类型 if col > 8 and time_result.__len__() <= 4: before_time = time_formate(sheet.cell(1, col - 1)) # 时间列为递增序列,若果出现后一个时间比前一个时间小,则必然是过了一个年份 if not before_time is None and int(before_time) < int(time_result): print(time_result + '转换为: 2019' + time_result) time_result = '2019' + time_result else: if not before_time is None and int(before_time) > int(time_result): print(time_result + '转换为: 2020' + time_result) time_result = '2020' + time_result else: # 第一列时间 if time_result.__len__() <= 4: print(time_result + '转换为: 2019' + time_result) time_result = '2019' + time_result data[time_result] = sheet.row_values(i)[col] data_arr.append(data) print(data) print('size:', data_arr.__len__(), data_arr) print('解析 excel:{} 结束......'.format(path)) return data_arr
def create_outputfile(prxdoc, inputfiles_element, inputfilehref, nominal_outputfilehref, outputfilehref, outputdict, ignore_locking): """Create the output XML file from the raw input by running any filters, etc. It will be presumed that the output XML file will eventually be referred to by nominal_outputfilehref, but the actual file written will be outputfilehref""" # print("inputfilehref=%s" % (inputfilehref.humanurl())) if inputfilehref.get_bare_unquoted_filename().lower().endswith( ".xls") or inputfilehref.get_bare_unquoted_filename().lower( ).endswith(".xlsx"): try: import xlrd import xlrd.sheet inputfileelement = outputdict[inputfilehref].inputfileelement # Any dc: namespace elements within the inputfileelement # will get placed in a dc:summary tag timestamp = datetime.datetime.fromtimestamp( os.path.getmtime(inputfilehref.getpath()), lm_timestamp.UTC()).isoformat() spreadsheet = xlrd.open_workbook(inputfilehref.getpath()) sheetname = prxdoc.getattr(inputfileelement, "sheetname", spreadsheet.sheet_names()[0]) sheet = spreadsheet.sheet_by_name(sheetname) titlerow = int(prxdoc.getattr(inputfileelement, "titlerow", "1")) - 1 # titlerow=sheet.row(titlerownum) nrows = sheet.nrows ncols = sheet.ncols rawtitles = [ str(sheet.cell(titlerow, col).value).strip() for col in range(ncols) ] tagnames = [ convert_to_tagname(splitunits(rawtitle)[0]) if rawtitle is not None and len(rawtitle) > 0 else "blank" for rawtitle in rawtitles ] unitnames = [ convert_to_tagname(splitunits(rawtitle)[1]) if rawtitle is not None and len(rawtitle) > 0 else None for rawtitle in rawtitles ] nsmap = copy.deepcopy(prx_nsmap) nsmap["ls"] = "http://limatix.org/spreadsheet" outdoc = xmldoc.xmldoc.newdoc("ls:sheet", nsmap=nsmap, contexthref=outputfilehref) # Copy dc: namespace elements within inputfileelement # into a dc:summary tag inputfileel_children = prxdoc.children(inputfileelement) summarydoc = None for inputfileel_child in inputfileel_children: if prxdoc.gettag(inputfileel_child).startswith("dc:"): if summarydoc is None: summarydoc = xmldoc.xmldoc.newdoc( "dc:summary", nsmap=nsmap, contexthref=prxdoc.getcontexthref()) pass # place in document with same context as where it came from summarydoc.getroot().append( copy.deepcopy(inputfileel_child)) pass pass if summarydoc is not None: # shift summary context and then copy it into outdoc summarydoc.setcontexthref(outdoc.getcontexthref()) outdoc.getroot().append(copy.deepcopy(summarydoc.getroot())) pass # Copy spreadsheet table for row in range(titlerow + 1, nrows): rowel = outdoc.addelement(outdoc.getroot(), "ls:row") rownumel = outdoc.addelement(rowel, "ls:rownum") outdoc.settext(rownumel, str(row)) for col in range(ncols): cell = sheet.cell(row, col) cell_type = xlrd.sheet.ctype_text.get( cell.ctype, 'unknown') if cell_type == "empty": continue cellel = outdoc.addelement(rowel, "ls:" + tagnames[col]) outdoc.setattr(cellel, "ls:celltype", cell_type) hyperlink = sheet.hyperlink_map.get((row, col)) if cell_type == "text" and hyperlink is None: outdoc.settext(cellel, cell.value) pass elif cell_type == "text" and hyperlink is not None: # Do we need to do some kind of conversion on # hyperlink.url_or_path() outdoc.settext(cellel, cell.value) hyperlink_href = dcv.hrefvalue( hyperlink.url_or_path, contexthref=inputfilehref) hyperlink_href.xmlrepr(outdoc, cellel) pass elif cell_type == "number": if unitnames[col] is not None: outdoc.setattr(cellel, "dcv:units", unitnames[col]) pass outdoc.settext(cellel, str(cell.value)) pass elif cell_type == "xldate": outdoc.settext( cellel, datetime.datetime( xlrd.xldate_as_tuple( cell.value, spreadsheet.datemode)).isoformat()) pass elif cell_type == "bool": outdoc.settext(cellel, str(bool(cell.value))) pass elif cell_type == "error": outdoc.settext(cellel, "ERROR %d" % (cell.value)) pass else: raise ValueError("Unknown cell type %s" % (cell_type)) pass pass # Did the user provide a prx:xslt href indicating # a transformation to apply? xslttag = prxdoc.xpathsinglecontext( outputdict[inputfilehref].inputfileelement, "prx:xslt", default=None) if xslttag is not None: # Replace outdoc with transformed copy outdoc = create_outputfile_process_xslt( prxdoc, xslttag, inputfiles_element, outputdict[inputfilehref].inputfileelement, outdoc) pass # Write out under new file name outputfilehref assert (outputfilehref != inputfilehref) outdoc.set_href(outputfilehref, readonly=False) outdoc.close() canonhash = None # could hash entire input file... pass except ImportError: raise (ImportError( "Need to install xlrd package in order to import .xls or .xlsx files" )) pass elif inputfilehref.has_fragment(): # input file url has a fragment... we're only supposed # to extract a portion of the file timestamp = datetime.datetime.fromtimestamp( os.path.getmtime(inputfilehref.getpath()), lm_timestamp.UTC()).isoformat() if inputfilehref.fragless() == prxdoc.get_filehref(): inputfilecontent = prxdoc # special case where input file is .prx file pass else: inputfilecontent = xmldoc.xmldoc.loadfile(inputfilehref.getpath()) pass inputfileportion = inputfilehref.evaluate_fragment(inputfilecontent) if len(inputfileportion) == 0: raise ValueError( "Input URL %s fragment reference failed to resolve" % (inputfilehref.humanurl())) elif len(inputfileportion) > 1: raise ValueError( "Input URL %s fragment reference resolved to multiple elements" % (inputfilehref.humanurl())) #print("inputfilehref=%s" % (inputfilehref.humanurl())) #print("inputfileportion=%s" % (etree.tostring(inputfileportion[0]))) #import pdb as pythondb #pythondb.set_trace() outdoc = xmldoc.xmldoc.copy_from_element( inputfilecontent, inputfileportion[0], nsmap=prx_nsmap ) # NOTE: prx_nsmap doesn't make much difference here because the nsmap of the element is copied in. prx_nsmap just makes our prefixes available through xmldoc # Create canonicalization from unmodified outdoc so that we can hash it outdoc_canon = BytesIO() outdoc.doc.write_c14n(outdoc_canon, exclusive=False, with_comments=True) canonhash = hashlib.sha256(outdoc_canon.getvalue()).hexdigest() if inputfileportion[0] is outputdict[inputfilehref].inputfileelement: # special case where this input file href with fragment # points to its very tag -- the <inputfiles> tag in the prxfile # auto-populate corresponding <outputfile> tags # i.e. modify outdoc to make sure there is an <outputfile> tag with an xlink:href # for each inputfile assert (inputfilecontent.gettag( inputfileportion[0]) == "prx:inputfiles") outdoc_inputfiletags = [ outdoc.getroot() ] # treat the root <inputfiles> tag as an inputfile outdoc_inputfiletags.extend(outdoc.xpath("prx:inputfile")) for outdoc_inputfiletag in outdoc_inputfiletags: if outdoc_inputfiletag is outdoc.getroot( ) and not outdoc.hasattr(outdoc_inputfiletag, "xlink:href"): # root prx:inputfiles tag has no xlink:href assert ( outdoc.gettag(outdoc_inputfiletag) == "prx:inputfiles") outdoc_inputfilehref = inputfilehref # subsegment of input file pass elif outdoc.hasattr( outdoc_inputfiletag, "xlink:href" ) and outdoc_inputfiletag is not outdoc.getroot(): outdoc_inputfilehref = dcv.hrefvalue.fromxml( outdoc, outdoc_inputfiletag) # specified input file pass else: raise ValueError( "Bad <prx:inputfiles> or <prx:inputfile> tag at %s" % (dcv.hrefvalue.fromelement( outdoc, outdoc_inputfiletag).humanurl())) #print("outdoc_inputfilehref:") #print(outdoc_inputfilehref) #print("outputdict keys:") #print(outputdict.keys()) assert ( outdoc_inputfilehref in outputdict ) # all of these input file references should be keys to the output dict because outputdict was made from the originals! # Find or create prx:outputfile tag outdoc_outputfiletag = outdoc.child(outdoc_inputfiletag, "prx:outputfile") if outdoc_outputfiletag is None: outdoc_outputfiletag = outdoc.addelement( outdoc_inputfiletag, "prx:outputfile") pass # Ensure prx:outputfile tag has a hyperlink if not outdoc.hasattr(outdoc_outputfiletag, "xlink:href"): outputdict[outdoc_inputfilehref].outputfilehref.xmlrepr( outdoc, outdoc_outputfiletag) pass pass pass # Did the user provide a prx:xslt href indicating # a transformation to apply? xslttag = prxdoc.xpathsinglecontext( outputdict[inputfilehref].inputfileelement, "prx:xslt", default=None) if xslttag is not None: outdoc = create_outputfile_process_xslt( prxdoc, xslttag, inputfiles_element, outputdict[inputfilehref].inputfileelement, outdoc) pass # Write out selected portion under new file name outputfilehref assert (outputfilehref != inputfilehref) outdoc.set_href(outputfilehref, readonly=False) outdoc.close() pass else: # input file url has no fragment, not .xls or .xlsx: treat it as XML # extract the whole thing! # Do we have an input filter? ... stored as xlink:href in <inputfilter> tag canonhash = None # (we could hash the entire inputfile!) inputfilters = prxdoc.xpathcontext( outputdict[inputfilehref].inputfileelement, "prx:inputfilter") if len(inputfilters) > 1: raise ValueError( "Maximum of one <inputfilter> element permitted in .prx file") timestamp = datetime.datetime.fromtimestamp( os.path.getmtime(inputfilehref.getpath()), lm_timestamp.UTC()).isoformat() xslttag = prxdoc.xpathsinglecontext( outputdict[inputfilehref].inputfileelement, "prx:xslt", default=None) if len(inputfilters) > 0: # have an input filter inputfilter = inputfilters[0] # run input filter # Get path from xlink:href #inputfilterpath=prxdoc.get_href_fullpath(inputfilter) inputfilterhref = dcv.hrefvalue.fromxml(prxdoc, inputfilter) inputfilterpath = inputfilterhref.getpath() # build arguments inputfilterargs = [inputfilterpath] # pull attributes named param1, param2, etc. from inputfilter tag cnt = 1 while "param" + cnt in inputfilter.attrib: inputfilterargs.append(inputfilter.attrib["param" + cnt]) cnt += 1 pass # add input and output filenames as params to filter inputfilterargs.append(inputfilehref.getpath()) inputfilterargs.append(outputfilehref.getpath()) # Call input filter... will raise # exception if input filter fails. subprocess.check_call(*inputfilterargs) pass elif xslttag is not None: indoc = xmldoc.xmldoc.loadhref(inputfilehref, nsmap=prx_nsmap, readonly=True) outdoc = create_outputfile_process_xslt( prxdoc, xslttag, inputfiles_element, outputdict[inputfilehref].inputfileelement, indoc) # Write out under new file name outputfilehref assert (outputfilehref != inputfilehref) outdoc.set_href(outputfilehref, readonly=False) outdoc.close() pass else: # use shutil to copy input to output shutil.copyfile(inputfilehref.getpath(), outputfilehref.getpath()) pass pass return (canonhash, timestamp)
def transPerFile(self, infile, outfile): """Called on a per file basis from transformAll""" self.mtime = os.path.getmtime(infile) wb = xlrd.open_workbook(filename=infile, on_demand=True) sheet = wb.sheet_by_index(0) root = ET.Element( "museumPlusExport", attrib={ "version": "2.0", "level": "dirty", }, ) tree = ET.ElementTree(root) columns = [sheet.cell(0, c).value for c in range(sheet.ncols)] base = os.path.basename(infile) # print ("%s -> %s" % (infile, tag)) # invalid xml characters: will be stripped remove_re = re.compile(u"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]") for r in range(1, sheet.nrows): # leave out column headers if re.match("so", base, flags=re.I): tag = "sammlungsobjekt" attrib = "objId" elif re.match("pk", base, flags=re.I): tag = "personKörperschaft" attrib = "kueId" elif re.match("mm", base, flags=re.I): tag = "multimediaobjekt" attrib = "mulId" elif re.match("aus", base, flags=re.I): tag = "ausstellung" attrib = "ausId" else: print("Error: Unknown file %s" % infile) sys.exit(1) index = sheet.cell(r, columns.index(attrib)).value if index: index = str(int(index)) if index != "": # Dont include rows without meaningful index t = datetime.fromtimestamp( self.mtime, timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") # print ('AAAAAAAA'+str(t)) doc = ET.SubElement(root, tag, attrib={ attrib: index, "exportdatum": str(t) }) print("INDEX: %s" % index) # should this become verbose? row_dict = {} for c in range(sheet.ncols): cell = sheet.cell(r, c) cellTypeStr = ctype_text.get(cell.ctype, "unknown type") tag = sheet.cell(0, c).value tag = (tag[0].lower() + tag[1:] ) # I want lowercase initial for all element names tag = re.sub( r"\W|&|<|>|:", "", tag) # xml spec: strip illegal chars for elements if re.search(r"\A[0-9]", tag): raise ValueError( "XML spec doesn't allow elements to begin with numbers" ) # type conversions if cellTypeStr == "number": # val=int(float(cell.value)) val = int(cell.value) # print ("number:%s" % val) elif cellTypeStr == "xldate": val = xlrd.xldate.xldate_as_datetime(cell.value, 0) # print ("XLDATE %s" % (val)) elif cellTypeStr == "text": # val=escape() leads to double escape val = remove_re.sub("", cell.value) # rm illegal xml char # print ("---------TypeError %s" % cellTypeStr) if cellTypeStr != "empty": # write non-empty elements # print ("%s:%s" % (attrib, tag)) val = str(val).strip( ) # rm leading and trailing whitespace; turn into str if tag != attrib and val != "": # print ( '%s: %s (%s)' % (tag, val, cellTypeStr)) row_dict[tag] = val for tag in sorted(row_dict.keys()): ET.SubElement(doc, tag).text = row_dict[tag] self.indent(root) # print ('%s->%s' % (inpath, outfile)) tree.write(outfile, encoding="UTF-8", xml_declaration=True)
csv_title = [] for t in csvfile[0]: csv_title.append(t.replace(' ', '').replace('_', '').lower()) # get xlsx no indent title list wb = load_workbook('whatever.xlsx') sheet = wb.active max_col = sheet.max_column xlsx_tag = [] for i in range(1, max_col): xlsx_tag.append( sheet.cell(1, i).value.replace(' ', '').replace('_', '').lower()) # get the compatitable compat_dct = {} compat_list = [] for i in csv_title: if i in xlsx_tag: compat_dct[i] = [csv_title.index(i), xlsx_tag.index(i)] compat_list.append([csv_title.index(i), xlsx_tag.index(i)]) print(csv_title) print(xlsx_tag) print(compat_dct) # get data from csv by column # write it into xlsx by column
def excel_view(request): xlsx_file = Path('Data set.xlsx') wb_obj = openpyxl.load_workbook(xlsx_file) sheet = wb_obj.active firstCell = sheet['B3'] lastCell = sheet['BC245'] mas = [] for j in range(firstCell.row, lastCell.row): mos = [] for i in range(firstCell.column, lastCell.column): if i == 26 or i == 27 or i == 29 or i == 30 or i == 32 or i == 33 or i == 34 or i == 35 or i == 36: i = i else: c = sheet.cell(row=j, column=i).value mos.append(c) mas.append(mos) # user id=1 - admin for i in mas: # head_ofshelter_animal = '' - имя пользователя animal = Animal(author=User.objects.get(id=1), idcard_registration_animal=i[0], type_animal=i[1], age_animal=i[2], weight_animal=i[3], name_animal=i[4], gender_animal=i[5], breed_animal=i[6], color_animal=i[7], hair_animal=i[8], ears_animal=i[9], tail_animal=i[10], size_animal=i[11], feature_animal=i[12], avairy_animal=i[13], identification_mark_animal=i[14], sterilization_date_animal=i[15], veterinarian_name_animal=i[16], socialized_animal=i[17], receipt_report_animal=i[18], date_receipt_report_animal=i[19], administrative_district_animal=i[20], catch_report_animal=i[21], catching_address_animal=i[22], legal_entity_animal=i[23], guardians_animal=i[24], natural_person_animal=i[25], date_admission_toshelter_animal=i[26], act_animal=i[27], date_leaving_shelter_animal=i[28], reason_leaving_animal=i[29], contract_leaving_animal=i[30], shelter_address_animal=i[31], exploit_organization_animal=i[32], head_ofshelter_animal=i[33], care_worker_animal=i[34], item_no_treatment_animal=i[35], date_treatment_parasite_animal=i[36], drug_name_animal=i[37], drug_dosage_animal=i[38], item_no_vaccine_animal=i[39], date_vaccine_animal=i[40], type_vaccine_animal=i[41], num_serial_animal=i[42], date_inspection_animal=i[43]) animal.save() print(animal)