示例#1
0
    def analysisSheet2List(self, sheet: sheet.Sheet, fieldlist: list):
        "解析sheet"
        if sheet.nrows == 0:
            return []
        mergeDic, lastrow = self.mergedCell(sheet)
        firstrow = sheet.row_values(lastrow)
        head = {}
        for i in range(len(firstrow)):
            cell = sheet.cell(lastrow, i)
            if (lastrow, i) in mergeDic:
                cell = sheet.cell(*mergeDic[(lastrow, i)])
            val = self.cell2Str(cell)
            if val in fieldlist:
                head[val] = i

        #logger.info(head)

        sheetList = []
        for i in range(lastrow + 1, sheet.nrows):
            rowdic = self.line2Dic(sheet, i, head)
            if rowdic:
                sheetList.append(rowdic)

        #logger.info(sheetList)
        return sheetList
示例#2
0
 def line2Dic(self, sheet: sheet.Sheet, row, headMap: dict):
     "行转字典"
     retdic = {}
     empty = True
     for key in headMap:
         if headMap[key] == None or headMap[key] < 0:
             continue
         sval = self.cell2Str(sheet.cell(row, headMap[key]))
         if sval and empty:
             empty = False
         retdic[key] = self.transF2H(sval)
     if empty:
         return None
     else:
         return retdic
示例#3
0
def buildNeihboorhoodProfiles():
    workbook = xlrd.open_workbook(inpfile)
    print(inpfile)
    neighboorlist = []
    for sheet in workbook.sheets():
        idx = 0
        r, c, field, idx = getNeighboorhoodField(idx)
        neighboor_fields = {}
        neighboor_fields['neighboorhood'] = sheet.name
        for row in range(sheet.nrows):
            for col in range(sheet.ncols):
                if row == r and col == c:
                    neighboor_fields[field] = sheet.cell(r, c).value
                    r, c, field, idx = getNeighboorhoodField(idx)
                    if r < 0:
                        neighboorlist.append(neighboor_fields)
                        break
    return neighboorlist
示例#4
0
def handle_date(path, pro_name, pro_type, pro_num):
    print('解析 excel:{} 开始......'.format(path))
    data_arr = []
    # excel_path = r'C:/Users/yamei/Desktop/原材料/付款/A280/A280 V3.1 30K付款计划20190522.xlsx'
    excel_path = r'' + str(path)
    book = xlrd.open_workbook(excel_path)
    sheet = book.sheet_by_index(0)
    rows = sheet.nrows
    cols = sheet.ncols
    print('sheet rows,cols:', rows, cols)

    for i in range(2, sheet.nrows):
        first_cell = sheet.cell(i, 0)
        # 第一行非数字,则说明此行数据是不重要的
        if not is_number(first_cell.value) and first_cell.value != '':
            continue
        data = {
            '产品名称': pro_name, '型号': pro_type, '台数': pro_num, '公司名称': '',
            '结算方式': '', '预计交货时间': ''
        }
        data['公司名称'] = sheet.row_values(i-1)[1] if sheet.row_values(i)[1] == '' else sheet.row_values(i)[1]
        data['结算方式'] = sheet.row_values(i)[4]

        sub_time = sheet.cell(i, 7)
        if sub_time.ctype == 2 or sub_time.ctype == 3:
            sub_time_value = xldate_as_datetime(sub_time.value, 0).strftime('%Y-%m-%d')
            data['预计交货时间'] = sub_time_value
        else:
            data['预计交货时间'] = sheet.row_values(i)[7]
        for col in range(8, cols):
            # print('row, cols is:', i, col)
            # 空白列,跳过
            if sheet.cell(1, col).value == '':
                continue
            time_result = time_formate(sheet.cell(1, col))
            if time_result is None:
                # data[sheet.row_values(1)[col]] = sheet.row_values(i)[col]
                # 供应商主营业务,备注列 丢弃
                continue
            else:
                # 时间字符串在4位,6位时间字符串则表示该列的值类型为时间类型
                if col > 8 and time_result.__len__() <= 4:
                    before_time = time_formate(sheet.cell(1, col - 1))
                    # 时间列为递增序列,若果出现后一个时间比前一个时间小,则必然是过了一个年份
                    if not before_time is None and int(before_time) < int(time_result):
                        print(time_result + '转换为: 2019' + time_result)
                        time_result = '2019' + time_result
                    else:
                        if not before_time is None and int(before_time) > int(time_result):
                            print(time_result + '转换为: 2020' + time_result)
                            time_result = '2020' + time_result
                else:
                    # 第一列时间
                    if time_result.__len__() <= 4:
                        print(time_result + '转换为: 2019' + time_result)
                        time_result = '2019' + time_result
                data[time_result] = sheet.row_values(i)[col]
        data_arr.append(data)
        print(data)
    print('size:', data_arr.__len__(), data_arr)
    print('解析 excel:{} 结束......'.format(path))
    return data_arr
示例#5
0
def create_outputfile(prxdoc, inputfiles_element, inputfilehref,
                      nominal_outputfilehref, outputfilehref, outputdict,
                      ignore_locking):
    """Create the output XML file from the raw input by running any filters, etc. 
    It will be presumed that the output XML file will eventually be referred to by nominal_outputfilehref, 
    but the actual file written will be outputfilehref"""

    # print("inputfilehref=%s" % (inputfilehref.humanurl()))
    if inputfilehref.get_bare_unquoted_filename().lower().endswith(
            ".xls") or inputfilehref.get_bare_unquoted_filename().lower(
            ).endswith(".xlsx"):
        try:
            import xlrd
            import xlrd.sheet

            inputfileelement = outputdict[inputfilehref].inputfileelement
            # Any dc: namespace elements within the inputfileelement
            # will get placed in a dc:summary tag

            timestamp = datetime.datetime.fromtimestamp(
                os.path.getmtime(inputfilehref.getpath()),
                lm_timestamp.UTC()).isoformat()
            spreadsheet = xlrd.open_workbook(inputfilehref.getpath())
            sheetname = prxdoc.getattr(inputfileelement, "sheetname",
                                       spreadsheet.sheet_names()[0])

            sheet = spreadsheet.sheet_by_name(sheetname)
            titlerow = int(prxdoc.getattr(inputfileelement, "titlerow",
                                          "1")) - 1

            # titlerow=sheet.row(titlerownum)
            nrows = sheet.nrows
            ncols = sheet.ncols

            rawtitles = [
                str(sheet.cell(titlerow, col).value).strip()
                for col in range(ncols)
            ]
            tagnames = [
                convert_to_tagname(splitunits(rawtitle)[0])
                if rawtitle is not None and len(rawtitle) > 0 else "blank"
                for rawtitle in rawtitles
            ]
            unitnames = [
                convert_to_tagname(splitunits(rawtitle)[1])
                if rawtitle is not None and len(rawtitle) > 0 else None
                for rawtitle in rawtitles
            ]

            nsmap = copy.deepcopy(prx_nsmap)
            nsmap["ls"] = "http://limatix.org/spreadsheet"

            outdoc = xmldoc.xmldoc.newdoc("ls:sheet",
                                          nsmap=nsmap,
                                          contexthref=outputfilehref)

            # Copy dc: namespace elements within inputfileelement
            # into a dc:summary tag
            inputfileel_children = prxdoc.children(inputfileelement)
            summarydoc = None
            for inputfileel_child in inputfileel_children:
                if prxdoc.gettag(inputfileel_child).startswith("dc:"):
                    if summarydoc is None:
                        summarydoc = xmldoc.xmldoc.newdoc(
                            "dc:summary",
                            nsmap=nsmap,
                            contexthref=prxdoc.getcontexthref())
                        pass
                    # place in document with same context as where it came from
                    summarydoc.getroot().append(
                        copy.deepcopy(inputfileel_child))
                    pass
                pass
            if summarydoc is not None:
                # shift summary context and then copy it into outdoc
                summarydoc.setcontexthref(outdoc.getcontexthref())
                outdoc.getroot().append(copy.deepcopy(summarydoc.getroot()))
                pass

            # Copy spreadsheet table
            for row in range(titlerow + 1, nrows):
                rowel = outdoc.addelement(outdoc.getroot(), "ls:row")
                rownumel = outdoc.addelement(rowel, "ls:rownum")
                outdoc.settext(rownumel, str(row))
                for col in range(ncols):
                    cell = sheet.cell(row, col)
                    cell_type = xlrd.sheet.ctype_text.get(
                        cell.ctype, 'unknown')
                    if cell_type == "empty":
                        continue

                    cellel = outdoc.addelement(rowel, "ls:" + tagnames[col])
                    outdoc.setattr(cellel, "ls:celltype", cell_type)
                    hyperlink = sheet.hyperlink_map.get((row, col))
                    if cell_type == "text" and hyperlink is None:
                        outdoc.settext(cellel, cell.value)
                        pass
                    elif cell_type == "text" and hyperlink is not None:
                        # Do we need to do some kind of conversion on
                        # hyperlink.url_or_path()
                        outdoc.settext(cellel, cell.value)
                        hyperlink_href = dcv.hrefvalue(
                            hyperlink.url_or_path, contexthref=inputfilehref)
                        hyperlink_href.xmlrepr(outdoc, cellel)
                        pass
                    elif cell_type == "number":
                        if unitnames[col] is not None:
                            outdoc.setattr(cellel, "dcv:units", unitnames[col])
                            pass
                        outdoc.settext(cellel, str(cell.value))
                        pass
                    elif cell_type == "xldate":
                        outdoc.settext(
                            cellel,
                            datetime.datetime(
                                xlrd.xldate_as_tuple(
                                    cell.value,
                                    spreadsheet.datemode)).isoformat())
                        pass
                    elif cell_type == "bool":
                        outdoc.settext(cellel, str(bool(cell.value)))
                        pass
                    elif cell_type == "error":
                        outdoc.settext(cellel, "ERROR %d" % (cell.value))
                        pass
                    else:
                        raise ValueError("Unknown cell type %s" % (cell_type))

                    pass
                pass

            # Did the user provide a prx:xslt href indicating
            # a transformation to apply?
            xslttag = prxdoc.xpathsinglecontext(
                outputdict[inputfilehref].inputfileelement,
                "prx:xslt",
                default=None)
            if xslttag is not None:
                # Replace outdoc with transformed copy
                outdoc = create_outputfile_process_xslt(
                    prxdoc, xslttag, inputfiles_element,
                    outputdict[inputfilehref].inputfileelement, outdoc)
                pass

            # Write out under new file name outputfilehref
            assert (outputfilehref != inputfilehref)
            outdoc.set_href(outputfilehref, readonly=False)
            outdoc.close()
            canonhash = None  # could hash entire input file...
            pass
        except ImportError:

            raise (ImportError(
                "Need to install xlrd package in order to import .xls or .xlsx files"
            ))

        pass
    elif inputfilehref.has_fragment():
        # input file url has a fragment... we're only supposed
        # to extract a portion of the file

        timestamp = datetime.datetime.fromtimestamp(
            os.path.getmtime(inputfilehref.getpath()),
            lm_timestamp.UTC()).isoformat()

        if inputfilehref.fragless() == prxdoc.get_filehref():
            inputfilecontent = prxdoc  # special case where input file is .prx file
            pass
        else:
            inputfilecontent = xmldoc.xmldoc.loadfile(inputfilehref.getpath())
            pass

        inputfileportion = inputfilehref.evaluate_fragment(inputfilecontent)
        if len(inputfileportion) == 0:
            raise ValueError(
                "Input URL %s fragment reference failed to resolve" %
                (inputfilehref.humanurl()))
        elif len(inputfileportion) > 1:
            raise ValueError(
                "Input URL %s fragment reference resolved to multiple elements"
                % (inputfilehref.humanurl()))

        #print("inputfilehref=%s" % (inputfilehref.humanurl()))
        #print("inputfileportion=%s" % (etree.tostring(inputfileportion[0])))
        #import pdb as pythondb
        #pythondb.set_trace()
        outdoc = xmldoc.xmldoc.copy_from_element(
            inputfilecontent, inputfileportion[0], nsmap=prx_nsmap
        )  # NOTE: prx_nsmap doesn't make much difference here because the nsmap of the element is copied in. prx_nsmap just makes our prefixes available through xmldoc

        # Create canonicalization from unmodified outdoc so that we can hash it
        outdoc_canon = BytesIO()
        outdoc.doc.write_c14n(outdoc_canon,
                              exclusive=False,
                              with_comments=True)
        canonhash = hashlib.sha256(outdoc_canon.getvalue()).hexdigest()

        if inputfileportion[0] is outputdict[inputfilehref].inputfileelement:
            # special case where this input file href with fragment
            # points to its very tag -- the <inputfiles> tag in the prxfile
            # auto-populate corresponding <outputfile> tags

            # i.e. modify outdoc to make sure there is an <outputfile> tag with an xlink:href
            # for each inputfile

            assert (inputfilecontent.gettag(
                inputfileportion[0]) == "prx:inputfiles")

            outdoc_inputfiletags = [
                outdoc.getroot()
            ]  # treat the root <inputfiles> tag as an inputfile
            outdoc_inputfiletags.extend(outdoc.xpath("prx:inputfile"))

            for outdoc_inputfiletag in outdoc_inputfiletags:
                if outdoc_inputfiletag is outdoc.getroot(
                ) and not outdoc.hasattr(outdoc_inputfiletag, "xlink:href"):
                    # root prx:inputfiles tag has no xlink:href
                    assert (
                        outdoc.gettag(outdoc_inputfiletag) == "prx:inputfiles")
                    outdoc_inputfilehref = inputfilehref  # subsegment of input file
                    pass

                elif outdoc.hasattr(
                        outdoc_inputfiletag, "xlink:href"
                ) and outdoc_inputfiletag is not outdoc.getroot():
                    outdoc_inputfilehref = dcv.hrefvalue.fromxml(
                        outdoc, outdoc_inputfiletag)  # specified input file
                    pass
                else:
                    raise ValueError(
                        "Bad <prx:inputfiles> or <prx:inputfile> tag at %s" %
                        (dcv.hrefvalue.fromelement(
                            outdoc, outdoc_inputfiletag).humanurl()))

                #print("outdoc_inputfilehref:")
                #print(outdoc_inputfilehref)
                #print("outputdict keys:")
                #print(outputdict.keys())

                assert (
                    outdoc_inputfilehref in outputdict
                )  # all of these input file references should be keys to the output dict because outputdict was made from the originals!

                # Find or create prx:outputfile tag
                outdoc_outputfiletag = outdoc.child(outdoc_inputfiletag,
                                                    "prx:outputfile")
                if outdoc_outputfiletag is None:
                    outdoc_outputfiletag = outdoc.addelement(
                        outdoc_inputfiletag, "prx:outputfile")
                    pass

                # Ensure prx:outputfile tag has a hyperlink
                if not outdoc.hasattr(outdoc_outputfiletag, "xlink:href"):
                    outputdict[outdoc_inputfilehref].outputfilehref.xmlrepr(
                        outdoc, outdoc_outputfiletag)
                    pass

                pass

            pass

        # Did the user provide a prx:xslt href indicating
        # a transformation to apply?
        xslttag = prxdoc.xpathsinglecontext(
            outputdict[inputfilehref].inputfileelement,
            "prx:xslt",
            default=None)
        if xslttag is not None:
            outdoc = create_outputfile_process_xslt(
                prxdoc, xslttag, inputfiles_element,
                outputdict[inputfilehref].inputfileelement, outdoc)
            pass

        # Write out selected portion under new file name outputfilehref
        assert (outputfilehref != inputfilehref)
        outdoc.set_href(outputfilehref, readonly=False)
        outdoc.close()

        pass
    else:
        # input file url has no fragment, not .xls or .xlsx: treat it as XML
        # extract the whole thing!

        # Do we have an input filter? ... stored as xlink:href in <inputfilter> tag
        canonhash = None  # (we could hash the entire inputfile!)
        inputfilters = prxdoc.xpathcontext(
            outputdict[inputfilehref].inputfileelement, "prx:inputfilter")

        if len(inputfilters) > 1:
            raise ValueError(
                "Maximum of one <inputfilter> element permitted in .prx file")
        timestamp = datetime.datetime.fromtimestamp(
            os.path.getmtime(inputfilehref.getpath()),
            lm_timestamp.UTC()).isoformat()

        xslttag = prxdoc.xpathsinglecontext(
            outputdict[inputfilehref].inputfileelement,
            "prx:xslt",
            default=None)

        if len(inputfilters) > 0:
            # have an input filter
            inputfilter = inputfilters[0]

            # run input filter
            # Get path from xlink:href
            #inputfilterpath=prxdoc.get_href_fullpath(inputfilter)
            inputfilterhref = dcv.hrefvalue.fromxml(prxdoc, inputfilter)
            inputfilterpath = inputfilterhref.getpath()
            # build arguments
            inputfilterargs = [inputfilterpath]

            # pull attributes named param1, param2, etc. from inputfilter tag
            cnt = 1
            while "param" + cnt in inputfilter.attrib:
                inputfilterargs.append(inputfilter.attrib["param" + cnt])
                cnt += 1
                pass

            # add input and output filenames as params to filter
            inputfilterargs.append(inputfilehref.getpath())
            inputfilterargs.append(outputfilehref.getpath())

            # Call input filter... will raise
            # exception if input filter fails.
            subprocess.check_call(*inputfilterargs)

            pass
        elif xslttag is not None:
            indoc = xmldoc.xmldoc.loadhref(inputfilehref,
                                           nsmap=prx_nsmap,
                                           readonly=True)
            outdoc = create_outputfile_process_xslt(
                prxdoc, xslttag, inputfiles_element,
                outputdict[inputfilehref].inputfileelement, indoc)

            # Write out under new file name outputfilehref
            assert (outputfilehref != inputfilehref)
            outdoc.set_href(outputfilehref, readonly=False)
            outdoc.close()

            pass
        else:
            # use shutil to copy input to output
            shutil.copyfile(inputfilehref.getpath(), outputfilehref.getpath())
            pass
        pass
    return (canonhash, timestamp)
示例#6
0
    def transPerFile(self, infile, outfile):
        """Called on a per file basis from transformAll"""

        self.mtime = os.path.getmtime(infile)
        wb = xlrd.open_workbook(filename=infile, on_demand=True)
        sheet = wb.sheet_by_index(0)

        root = ET.Element(
            "museumPlusExport",
            attrib={
                "version": "2.0",
                "level": "dirty",
            },
        )
        tree = ET.ElementTree(root)

        columns = [sheet.cell(0, c).value for c in range(sheet.ncols)]

        base = os.path.basename(infile)

        # print ("%s -> %s" % (infile, tag))
        # invalid xml characters: will be stripped
        remove_re = re.compile(u"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]")

        for r in range(1, sheet.nrows):  # leave out column headers
            if re.match("so", base, flags=re.I):
                tag = "sammlungsobjekt"
                attrib = "objId"

            elif re.match("pk", base, flags=re.I):
                tag = "personKörperschaft"
                attrib = "kueId"

            elif re.match("mm", base, flags=re.I):
                tag = "multimediaobjekt"
                attrib = "mulId"

            elif re.match("aus", base, flags=re.I):
                tag = "ausstellung"
                attrib = "ausId"
            else:
                print("Error: Unknown file %s" % infile)
                sys.exit(1)

            index = sheet.cell(r, columns.index(attrib)).value
            if index:
                index = str(int(index))

            if index != "":  # Dont include rows without meaningful index
                t = datetime.fromtimestamp(
                    self.mtime, timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
                # print ('AAAAAAAA'+str(t))
                doc = ET.SubElement(root,
                                    tag,
                                    attrib={
                                        attrib: index,
                                        "exportdatum": str(t)
                                    })
                print("INDEX: %s" % index)  # should this become verbose?

                row_dict = {}

                for c in range(sheet.ncols):
                    cell = sheet.cell(r, c)
                    cellTypeStr = ctype_text.get(cell.ctype, "unknown type")
                    tag = sheet.cell(0, c).value
                    tag = (tag[0].lower() + tag[1:]
                           )  # I want lowercase initial for all element names

                    tag = re.sub(
                        r"\W|&|<|>|:", "",
                        tag)  # xml spec: strip illegal chars for elements
                    if re.search(r"\A[0-9]", tag):
                        raise ValueError(
                            "XML spec doesn't allow elements to begin with numbers"
                        )
                    # type conversions
                    if cellTypeStr == "number":
                        # val=int(float(cell.value))
                        val = int(cell.value)
                        # print ("number:%s" % val)

                    elif cellTypeStr == "xldate":
                        val = xlrd.xldate.xldate_as_datetime(cell.value, 0)
                        # print ("XLDATE %s" % (val))

                    elif cellTypeStr == "text":
                        # val=escape() leads to double escape
                        val = remove_re.sub("",
                                            cell.value)  # rm illegal xml char
                        # print ("---------TypeError %s" % cellTypeStr)

                    if cellTypeStr != "empty":  # write non-empty elements
                        # print ("%s:%s" % (attrib, tag))
                        val = str(val).strip(
                        )  # rm leading and trailing whitespace; turn into str
                        if tag != attrib and val != "":
                            # print ( '%s: %s (%s)' % (tag, val, cellTypeStr))
                            row_dict[tag] = val

                for tag in sorted(row_dict.keys()):
                    ET.SubElement(doc, tag).text = row_dict[tag]

        self.indent(root)

        # print ('%s->%s' % (inpath, outfile))
        tree.write(outfile, encoding="UTF-8", xml_declaration=True)
示例#7
0
csv_title = []

for t in csvfile[0]:
    csv_title.append(t.replace(' ', '').replace('_', '').lower())

# get xlsx no indent title list
wb = load_workbook('whatever.xlsx')
sheet = wb.active
max_col = sheet.max_column

xlsx_tag = []

for i in range(1, max_col):
    xlsx_tag.append(
        sheet.cell(1, i).value.replace(' ', '').replace('_', '').lower())

# get the compatitable
compat_dct = {}
compat_list = []
for i in csv_title:
    if i in xlsx_tag:
        compat_dct[i] = [csv_title.index(i), xlsx_tag.index(i)]
        compat_list.append([csv_title.index(i), xlsx_tag.index(i)])

print(csv_title)
print(xlsx_tag)

print(compat_dct)
# get data from csv by column
# write it into xlsx by column
示例#8
0
def excel_view(request):
    xlsx_file = Path('Data set.xlsx')
    wb_obj = openpyxl.load_workbook(xlsx_file)
    sheet = wb_obj.active

    firstCell = sheet['B3']
    lastCell = sheet['BC245']
    mas = []
    for j in range(firstCell.row, lastCell.row):
        mos = []
        for i in range(firstCell.column, lastCell.column):
            if i == 26 or i == 27 or i == 29 or i == 30 or i == 32 or i == 33 or i == 34 or i == 35 or i == 36:
                i = i
            else:
                c = sheet.cell(row=j, column=i).value
                mos.append(c)
        mas.append(mos)
    # user id=1 - admin
    for i in mas:
        # head_ofshelter_animal = '' - имя пользователя

        animal = Animal(author=User.objects.get(id=1),
                        idcard_registration_animal=i[0],
                        type_animal=i[1],
                        age_animal=i[2],
                        weight_animal=i[3],
                        name_animal=i[4],
                        gender_animal=i[5],
                        breed_animal=i[6],
                        color_animal=i[7],
                        hair_animal=i[8],
                        ears_animal=i[9],
                        tail_animal=i[10],
                        size_animal=i[11],
                        feature_animal=i[12],
                        avairy_animal=i[13],
                        identification_mark_animal=i[14],
                        sterilization_date_animal=i[15],
                        veterinarian_name_animal=i[16],
                        socialized_animal=i[17],
                        receipt_report_animal=i[18],
                        date_receipt_report_animal=i[19],
                        administrative_district_animal=i[20],
                        catch_report_animal=i[21],
                        catching_address_animal=i[22],
                        legal_entity_animal=i[23],
                        guardians_animal=i[24],
                        natural_person_animal=i[25],
                        date_admission_toshelter_animal=i[26],
                        act_animal=i[27],
                        date_leaving_shelter_animal=i[28],
                        reason_leaving_animal=i[29],
                        contract_leaving_animal=i[30],
                        shelter_address_animal=i[31],
                        exploit_organization_animal=i[32],
                        head_ofshelter_animal=i[33],
                        care_worker_animal=i[34],
                        item_no_treatment_animal=i[35],
                        date_treatment_parasite_animal=i[36],
                        drug_name_animal=i[37],
                        drug_dosage_animal=i[38],
                        item_no_vaccine_animal=i[39],
                        date_vaccine_animal=i[40],
                        type_vaccine_animal=i[41],
                        num_serial_animal=i[42],
                        date_inspection_animal=i[43])
        animal.save()
        print(animal)