def generate_shex_from_csv(filepath, delim=",", skip_header=False):
        """
        This method can be used to generate ShEx from application profile CSV file. However, the input file must contain one or more lines. Each line contains '|' separated values. If filepath is a string, filename  should be set to false.

        Parameters
        ----------
          filepath : str
            This parameter can contain either a file path of a CSV file or shexstatements in CSV format.
          delim : str
            a delimiter. Allowed values include ',', '|' and ';' 
          skip_header : bool
            if the first line is a header, set this value to True. By default, the value is False.

        Returns
        -------
          shex
            shape expression

        """
        shexstatement = ""
        try:
            data = ""
            with open(filepath, 'r') as csvfile:
                csvreader = csv.reader(csvfile, delimiter=delim)
                rowno = 0
                shapename = ""
                typelines = set()
                for row in csvreader:
                    rowno = rowno + 1
                    if skip_header and rowno == 1:
                        continue
                    line = ""
                    # Ignore lines with incorrect number of values
                    if (len(row) != 8):
                        continue
                    if row[0]:
                        shapename = "@" + row[0]
                    if row[6] and row[1]:
                        typelines.add("@" + row[6] + "type" + "|rdf:type|" +
                                      row[6] + "\n")
                        line = shapename + "|" + \
                            row[1]+"|" + "@" + row[6]+"type"
                    else:
                        line = shapename + "|" + row[1] + "|" + row[5]
                    mand = row[3].lower() == "yes"
                    repeat = row[4].lower() == "yes"
                    if mand and repeat:
                        line = line + "|+"
                    elif mand and not repeat:
                        line = line + "|1"
                    elif not mand and repeat:
                        line = line + "|*"
                    elif not mand and not repeat:
                        line = line + "|0,1"
                    if row[7]:
                        line = line + "|#" + row[7]
                    data = data + line + "\n"
            if typelines:
                data = data + "".join(typelines) + "\n"
            shexstatement = CSV.generate_shex_from_data_string(data)
        except Exception as e:
            print("Unable to parse. Error: " + str(e))
        return shexstatement
示例#2
0
    def generate_shex_from_spreadsheet(filepath,
                                       skip_header=False,
                                       stream=None):
        """
        This method can be used to generate ShEx from data string. However, the input data string must contain one or more lines. Each line contains '|' separated values. If filepath is a string, filename  should be set to false.

        Parameters
        ----------
          filepath : str
            This parameter contains path of a Spreadsheet file
          skip_header : bool
            if the first line is a header, set this value to True. By default, the value is False.

        Returns
        -------
          shex
            shape expression

        """
        shexstatement = ""
        try:
            pattern = '^\s*$'
            data = ""
            filename, file_extension = splitext(filepath)

            if (file_extension in {".xlsx", ".xlsm", ".xltx", ".xltm"}):
                wb = None
                if stream is not None:
                    with open("tmp" + filepath, "wb") as sf:
                        sf.write(stream)
                    sf.close()
                    filepath = "tmp" + filepath

                wb = load_workbook(filepath)
                for ws in wb.worksheets:
                    for i in range(1, ws.max_row + 1):
                        line = list()
                        for j in range(1, ws.max_column + 1):
                            cell = ws.cell(row=i, column=j).value
                            if cell is not None:
                                line.append(cell)
                        line = "|".join(line)
                        data = data + line + "\n"

                if stream is not None:
                    remove(filepath)

            elif (file_extension in {".xls"}):
                wb = None
                if stream is not None:
                    #wb = open_workbook(file_contents=stream, encoding_override="cp1252")
                    wb = open_workbook(file_contents=stream)
                else:
                    wb = open_workbook(filepath)
                for sheet in wb.sheets():
                    for i in range(0, wb.sheets()[0].nrows):
                        line = list()
                        for j in range(0, wb.sheets()[0].ncols):
                            cell = sheet.cell(i, j).value
                            if len(str(cell)) > 0:
                                line.append(cell)
                        data = data + "|".join(line) + "\n"

            elif (file_extension in {".ods"}):
                wb = None
                if stream is not None:
                    with open("tmp" + filepath, "wb") as sf:
                        sf.write(stream)
                    sf.close()
                    filepath = "tmp" + filepath

                wb = load(filepath)
                wb = wb.spreadsheet
                rows = wb.getElementsByType(TableRow)
                for row in rows:
                    cells = row.getElementsByType(TableCell)
                    line = list()
                    for cell in cells:
                        if len(str(cell)) > 0:
                            line.append(str(cell))
                    data = data + "|".join(line) + "\n"

                if stream is not None:
                    remove(filepath)

            shexstatement = CSV.generate_shex_from_data_string(data)
        except Exception as e:
            print("Unable to read file. Error: " + str(e))
        return shexstatement