示例#1
0
def get_string_from_excel_cell(_xl_sheet, _row_index, _column_index):
    cell_obj = _xl_sheet.cell(_row_index, _column_index)
    cell_type_str = ctype_text.get(cell_obj.ctype, "unknown type")
    if cell_type_str == "text":
        return cell_obj.value.strip()
    else:
        return ""
示例#2
0
def write_table(s,row_offset,col1,num_cols,refs,ref_col,wr):

    rowheader = get_rowheader(s,row_offset)
    col1_index = get_col_index(s,rowheader,col1)

    # this is for the parent table wich has a refereced column.
    # will treat composed keys later
    if ref_col != "NULL":
        ref_index = get_col_index(s,rowheader,ref_col)
        refs.append(ref_col)

    ref_row=0

    for rowid in range(rowheader,s.nrows):
        # Conver header fields to lower and replace blanks by '_'
        #print "======>>> Row=%s" % rowid
        if rowid == rowheader:
            head = [] 
            if ref_col == "NULL":
                head.append(format_header(refs[ref_row]))
            for colid in range(col1_index,(col1_index+num_cols)):
                head.append(format_header(s.cell(rowid,colid).value))
            
            wr.writerow(head)
        else:
            row = []
            cell_obj=s.cell(rowid,col1_index)
            cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')

            if cell_type_str == 'empty' and ctype_text.get(s.cell(rowid-1,col1_index).ctype, 'unknown type') != 'empty' and ref_col == "NULL":
                ref_row+=1

            if cell_type_str != 'empty':
                if ref_col == "NULL":
                    row.append(refs[ref_row])
                
                for colid in range(col1_index,(col1_index+num_cols)):
                    row.append(s.cell(rowid,colid).value)
                wr.writerow(row)
                if ref_col != "NULL":
                    refs.append(s.cell(rowid,ref_index).value)


    return refs
示例#3
0
文件: parse.py 项目: strangesast/qr
def extract_po(wb: xlrd.book.Book) -> str:
    sheet = wb.sheet_by_index(0)
    # try col 9, 10
    for i in range(9, 11):
        cell: xlrd.sheet.Cell = sheet.cell(2, i)
        if ctype_text.get(cell.ctype) != 'text':
            continue
        po = cell.value
        if po_re.match(po):
            return po
    return None
示例#4
0
def show_column_names(xl_sheet):
    a = []
    row = xl_sheet.row(0)  # 1st row
    print(60 * '-' + 'n(Column #) value [type]n' + 60 * '-')
    for idx, cell_obj in enumerate(row):
        cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
        a.append(' ' + cell_obj.value)

        print('(%s) %s [%s]' % (idx, cell_obj.value, cell_type_str,))

    return a
def getColumnNamesListFromSheet(sheet):
	columnNamesRow = sheet.row(0)
	columnNames = []
	for index, columnName in enumerate(columnNamesRow):
		cellDataType = ctype_text.get(columnName.ctype, 'unknown type')
		if cellDataType != "text":
			print("ERROR: Only 'text' data types for column names is currently supported");
			sys.exit(1)
		
		columnNames.append(columnName.value)
	return columnNames
    def clean_data(self):
        'Clean the tweet data after loading completed, remove stop words and apply stemming to the featured words in tweets'
        self.cleaned_tweets_X = []
        self.cleaned_tweets_Y = []
        for row_idx in range(0, self.raw_data.nrows):
            #fetch data from 4th column in excel sheet
            original_tweet = self.raw_data.cell(row_idx, 0)
            cell_type_tweet = ctype_text.get(original_tweet.ctype, 'unknown type')
            sentiment = self.raw_data.cell(row_idx, 1)
            cell_type_sentiment = ctype_text.get(sentiment.ctype, 'unknown type')

            # tweet should be text and sentiment value a number
            if cell_type_tweet == "text" and cell_type_sentiment == "number":
                original_tweet = original_tweet.value.lower()
                sentiment = int(sentiment.value)

                count=0
                #remove garbage values like: .,!"<>{}[]?
                tweet = original_tweet.replace('. ',' ').replace('..',' ').replace(';',' ').replace(', ',' ').replace('!',' ').replace('"',' ')\
                        .replace('<',' ').replace('>',' ').replace('{',' ').replace('}',' ').replace('[',' ').replace(']',' ')\
                        .replace('?',' ').replace("'",'').replace(',',' ').replace(',',' ').replace('%',' ')

                #tokenize into words
                wordList = tweet.split()

                sentence = ""
                for word in wordList:
                    #remove stop words and words having 1 and or 2 characters
                    value = self.isStopWord(word)
                    if value == False:
                        #remove the tags </e>, <a> etc.
                        if len(word)==2 and word[0:1]=='/':
                            continue
                        #only consider words which are more than 1 character in length
                        if len(word)>1:
                            word = self.stemmer.stem(word)
                            word = word.encode('utf8')
                            sentence = sentence + word + ' '
                            count = count + 1
                self.cleaned_tweets_X.append(sentence)
                self.cleaned_tweets_Y.append(sentiment)
示例#7
0
def spreadsheet_text_encode(f_bytes, encoding):

    #UTF-8 is assumed for encoding, which isn't great. May want to modify later.

    wb = xlrd.open_workbook(file_contents=f_bytes)
    text = []
    for sheet in wb.sheets():
        for row in sheet.get_rows():
            filtered_row = filter(
                lambda x: ctype_text.get(x.ctype, 'not_text') == 'text', row)
            filtered_row = [s.value for s in filtered_row]
            text += [" ".join(filtered_row)]
    return " ".join(text)
示例#8
0
def xls2pg():
    from xlrd.sheet import ctype_text
    global XLS_FILE
    global XLS_ENCODE
    global PG_CONN
    bk = None
    print "Reading XLS"
    if (XLS_ENCODE == None):
        bk = xlrd.open_workbook(XLS_FILE)
    else:
        bk = xlrd.open_workbook(XLS_FILE, encoding_override=XLS_ENCODE)
    print "Creating " + TABLE + " table"
    sheet = bk.sheet_by_index(0)
    row = sheet.row(0)
    create_table = "CREATE TABLE " + TABLE + "("
    for idx, cell_obj in enumerate(row):
        #cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
        #print cell_type_str
        col = remove_accents(cell_obj.value)
        col = col.replace(" ", "_").lower()
        create_table = create_table + col + " varchar,"
    create_table = create_table[0:-1] + ");"

    num_cols = sheet.ncols
    print "Inserting data"
    try:
        cur = PG_CONN.cursor()
        cur.execute(create_table)
        for row_idx in range(1, sheet.nrows):
            insert = "INSERT INTO " + TABLE + " VALUES("
            row = sheet.row(row_idx)
            for idx, cell_obj in enumerate(row):
                cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
                if cell_type_str == "text":
                    cell = unicode(cell_obj.value)
                    cell = cell.encode('UTF-8')
                    cell = cell.replace("'", "''")
                else:
                    cell = str(cell_obj.value)
                insert = insert + "'" + cell + "',"
            insert = insert[0:-1] + ");"
            cur.execute(insert)
            if ((row_idx % 100) == 0):
                print("."),
        print "Commiting data"
        cur.close()
        PG_CONN.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        closeConn()
示例#9
0
 def readData(self):
     for i in range(0, 22):
         self.data[i] = []
     self.row1 = self.sheet.row(0)
     for i in range(2, 1632):
         self.row1 = self.sheet.row(i - 1)
         for idx, cell_obj in enumerate(self.row1):
             cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
             #print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value))
             if cell_obj.value == "" and idx == 0:
                 #print("abc")
                 break
             else:
                 self.data[idx].append(cell_obj.value)
示例#10
0
def locate_row_descriptions(xl_workbook, first_col, first_row, sheet_name,
                            data_cols):
    """ Function to locate the column where the row headers are. Returns a single column """
    if len(data_cols) == 1:
        table_type = "long format"
    elif len(data_cols) > 1:
        table_type = "wide format"
    else:
        table_type = "no data"

    sheet = xl_workbook.sheet_by_name(sheet_name)
    row_descriptions = []
    # Where is the descriptor column?
    if table_type == "wide format":
        for c in range(0, first_col):
            cell = sheet.row(first_row)[c]
            if ctype_text.get(cell.ctype,
                              'unknown type') not in ["text", "xldate"]:
                continue
            else:
                row_descriptions.append(c)
                #break
    elif table_type == "long format":
        for c in range(0, first_col):
            cell = sheet.row(first_row - 1)[c]
            if ctype_text.get(cell.ctype,
                              'unknown type') not in ["text", "xldate"]:
                continue
            else:
                row_descriptions.append(c)

    if len(row_descriptions) == 0:
        print("Something went wrong. Could not locate row headings")
        return "Could not locate row headings", table_type  # Possibly a pivot table

    return row_descriptions, table_type
示例#11
0
def load_file(file_name):
    book = xlrd.open_workbook(file_name, formatting_info=True)
    sheets = book.sheet_names()
    #print ("sheets are:", sheets)
    links = get_links(file_name)
    for index, sh in enumerate(sheets):
        sheet = book.sheet_by_index(index)
        #print ("Sheet:", sheet.name)
        rows, cols = sheet.nrows, sheet.ncols
        #print ("Number of rows: %s   Number of cols: %s" % (rows, cols))

        # Iterate through rows, and print out the column values
        tbl = Table()
        #print(sheet.nrows)
        for row_idx in range(1, sheet.nrows):
            #print('Row ', row_idx)
            row = Row(row_idx)
            for col_idx in range(len(cols_name)):
                cell_obj = sheet.cell(row_idx, col_idx)
                cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
                #print("\t(%s): %s" % (cols_name[col_idx], cell_obj.value))
                if col_idx == len(cols_name) - 1:
                    try:
                        value = links[row_idx + 1][11:-9]
                    except KeyError:
                        value = None
                        continue
                else:
                    value = str(cell_obj.value)
                xfx = sheet.cell_xf_index(row_idx, col_idx)
                xf = book.xf_list[xfx]
                bgx = xf.background.pattern_colour_index
                #print ("\t\tColor %d" % bgx)

                color = None
                if bgx == 10:
                    color = Color.RED
                elif bgx == 13:
                    color = Color.YELLOW
                elif bgx == 57:
                    color = Color.GREEN
                else:
                    color = Color.WHITE

                row.update_cell(col_idx, value, color)
            tbl.append(row)
    return tbl
示例#12
0
文件: xls.py 项目: ecosoft-odoo/cft
 def _get_cell_value(self, cell, field_type=False):
     """ If Odoo's field type is known, convert to valid string for import,
     if not know, just get value  as is """
     value = False
     datemode = 0  # From book.datemode, but we fix it for simplicity
     if field_type in ['date', 'datetime']:
         ctype = ctype_text.get(cell.ctype, 'unknown type')
         if ctype == 'number':
             time_tuple = xlrd.xldate_as_tuple(cell.value, datemode)
             date = datetime(*time_tuple)
             if field_type == 'date':
                 value = date.strftime("%Y-%m-%d")
             elif field_type == 'datetime':
                 value = date.strftime("%Y-%m-%d %H:%M:%S")
         else:
             value = cell.value
     elif field_type in ['integer', 'float']:
         value_str = str(cell.value).strip().replace(',', '')
         if len(value_str) == 0:
             value = ''
         elif value_str.replace('.', '', 1).isdigit():  # Is number
             if field_type == 'integer':
                 value = int(float(value_str))
             elif field_type == 'float':
                 value = float(value_str)
         else:  # Is string, no conversion
             value = value_str
     elif field_type in ['many2one']:
         # If number, change to string
         if isinstance(cell.value, (int, long, float, complex)):
             value = str(cell.value)
         else:
             value = cell.value
     else:
         value = cell.value
     # If string, cleanup
     if isinstance(value, str):
         value = value.encode('utf-8')
         if value[-2:] == '.0':
             value = value[:-2]
     # Except boolean, when no value, we should return as ''
     if field_type not in ['boolean']:
         if not value:
             value = ''
     return value
示例#13
0
 def _get_cell_value(self, cell, field_type=False):
     """ If Odoo's field type is known, convert to valid string for import,
     if not know, just get value  as is """
     value = False
     datemode = 0  # From book.datemode, but we fix it for simplicity
     if field_type in ['date', 'datetime']:
         ctype = ctype_text.get(cell.ctype, 'unknown type')
         if ctype == 'number':
             time_tuple = xlrd.xldate_as_tuple(cell.value, datemode)
             date = datetime(*time_tuple)
             if field_type == 'date':
                 value = date.strftime("%Y-%m-%d")
             elif field_type == 'datetime':
                 value = date.strftime("%Y-%m-%d %H:%M:%S")
         else:
             value = cell.value
     elif field_type in ['integer', 'float']:
         value_str = str(cell.value).strip().replace(',', '')
         if len(value_str) == 0:
             value = ''
         elif value_str.replace('.', '', 1).isdigit():  # Is number
             if field_type == 'integer':
                 value = int(float(value_str))
             elif field_type == 'float':
                 value = float(value_str)
         else:  # Is string, no conversion
             value = value_str
     elif field_type in ['many2one']:
         # If number, change to string
         if isinstance(cell.value, (int, long, float, complex)):
             value = str(cell.value)
         else:
             value = cell.value
     else:
         value = cell.value
     # If string, cleanup
     if isinstance(value, str):
         value = value.encode('utf-8')
         if value[-2:] == '.0':
             value = value[:-2]
     # Except boolean, when no value, we should return as ''
     if field_type not in ['boolean']:
         if not value:
             value = ''
     return value
示例#14
0
def parse_mds_leader_organogram(filename):
	#open worksheet
	xl_wb = xl_workbook = xlrd.open_workbook(filename)
	sheet_names = xl_workbook.sheet_names()
	xl_sheet = xl_workbook.sheet_by_index(0)

	from dialogues.models import *
	row = xl_sheet.row(0)  

	district_count = 0

	for i in range(8,xl_sheet.nrows):
		row = xl_sheet.row(i)
		valid_dist = False
		
		for idx, cell_obj in enumerate(row):
		
			cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
			if idx == 0:
				region = cell_obj.value.strip()
			if idx == 1:
				ga = cell_obj.value.strip()
			if idx == 2:
				area = cell_obj.value.strip()
			if idx == 3:
				chapter = cell_obj.value.strip()
			if idx == 4:
				district = cell_obj.value.strip()
			if idx == 6:
				structure = cell_obj.value.strip()
				if structure.lower() == 'district':
					district_count += 1
					valid_dist = True
					#print "%s, %s, %s, %s, %s"%(region, ga, area, chapter, district)
					
		if valid_dist:
			try_create_region(region)
			try_create_ga(ga, region)
			try_create_area(area, ga)
			try_create_chapter(chapter, area)
			try_create_district(district, chapter)

	print "Total number of districts added: %d"%district_count
	return district_count
示例#15
0
def open_file(path):
    """
    Open and read an Excel file
    """
    # Open the workbook
    xl_workbook = xlrd.open_workbook(path)

    # List sheet names, and pull a sheet by name
    #
    sheet_names = xl_workbook.sheet_names()
    print('Sheet Names', sheet_names)

    xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])

    # Or grab the first sheet by index
    #  (sheets are zero-indexed)
    #
    xl_sheet = xl_workbook.sheet_by_index(0)
    print('Sheet name: %s' % xl_sheet.name)

    # Pull the first row by index
    #  (rows/columns are also zero-indexed)
    #
    row = xl_sheet.row(0)  # 1st row

    # Print 1st row values and types
    #
    from xlrd.sheet import ctype_text

    print('(Column #) type:value')
    for idx, cell_obj in enumerate(row):
        cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
        print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value))

    # Print all values, iterating through rows and columns
    #
    num_cols = xl_sheet.ncols  # Number of columns
    for row_idx in range(0, xl_sheet.nrows):  # Iterate through rows
        print('-' * 40)
        print('Row: %s' % row_idx)  # Print row number
        for col_idx in range(0, num_cols):  # Iterate through columns
            cell_obj = xl_sheet.cell(row_idx,
                                     col_idx)  # Get cell object by row, col
            print('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))
示例#16
0
def parse_mds_leader_organogram(filename):
    #open worksheet
    xl_wb = xl_workbook = xlrd.open_workbook(filename)
    sheet_names = xl_workbook.sheet_names()
    xl_sheet = xl_workbook.sheet_by_index(0)

    from dialogues.models import *
    row = xl_sheet.row(0)

    district_count = 0

    for i in range(8, xl_sheet.nrows):
        row = xl_sheet.row(i)
        valid_dist = False

        for idx, cell_obj in enumerate(row):

            cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
            if idx == 0:
                region = cell_obj.value.strip()
            if idx == 1:
                ga = cell_obj.value.strip()
            if idx == 2:
                area = cell_obj.value.strip()
            if idx == 3:
                chapter = cell_obj.value.strip()
            if idx == 4:
                district = cell_obj.value.strip()
            if idx == 6:
                structure = cell_obj.value.strip()
                if structure.lower() == 'district':
                    district_count += 1
                    valid_dist = True
                    #print "%s, %s, %s, %s, %s"%(region, ga, area, chapter, district)

        if valid_dist:
            try_create_region(region)
            try_create_ga(ga, region)
            try_create_area(area, ga)
            try_create_chapter(chapter, area)
            try_create_district(district, chapter)

    print "Total number of districts added: %d" % district_count
    return district_count
示例#17
0
def get_column_stats(xl_sheet, col_idx):
    """
    :param xl_sheet:  Sheet object from Excel Workbook, extracted using xlrd
    :param col_idx: zero-indexed int indicating a column in the Excel workbook
    """
    if xl_sheet is None:
        print('xl_sheet is None')
        return

    if not col_idx.isdigit():
        print('Please enter a valid column number (0-%d)' %
              (xl_sheet.ncols - 1))
        return

    col_idx = int(col_idx)
    if col_idx < 0 or col_idx >= xl_sheet.ncols:
        print('Please enter a valid column number (0-%d)' %
              (xl_sheet.ncols - 1))
        return

    # Iterate through rows, and print out the column values
    row_vals = []
    for row_idx in range(0, xl_sheet.nrows):
        cell_obj = xl_sheet.cell(row_idx, col_idx)
        cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
        print('(row %s) %s (type:%s)' %
              (row_idx, cell_obj.value, cell_type_str))
        row_vals.append(cell_obj.value)

    # Retrieve non-empty rows
    nonempty_row_vals = [x for x in row_vals if x]
    num_rows_missing_vals = xl_sheet.nrows - len(nonempty_row_vals)
    print('Vals: %d; Rows Missing Vals: %d' %
          (len(nonempty_row_vals), num_rows_missing_vals))

    # Count occurrences of values
    counts = Counter(nonempty_row_vals)

    # Display value counts
    print('-' * 40 + 'n', 'Top Twenty Values', 'n' + '-' * 40)
    print('Value [count]')
    for val, cnt in counts.most_common(20):
        print('%s [%s]' % (val, cnt))
示例#18
0
def open_file(path):
    """
    Open and read an Excel file
    """
    # Open the workbook
    xl_workbook = xlrd.open_workbook(path)
    
    # List sheet names, and pull a sheet by name
    #
    sheet_names = xl_workbook.sheet_names()
    print('Sheet Names', sheet_names)
    
    xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])
    
    # Or grab the first sheet by index 
    #  (sheets are zero-indexed)
    #
    xl_sheet = xl_workbook.sheet_by_index(0)
    print ('Sheet name: %s' % xl_sheet.name)
    
    # Pull the first row by index
    #  (rows/columns are also zero-indexed)
    #
    row = xl_sheet.row(0)  # 1st row
    
    # Print 1st row values and types
    #
    from xlrd.sheet import ctype_text   
    
    print('(Column #) type:value')
    for idx, cell_obj in enumerate(row):
        cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
        print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value))
    
    # Print all values, iterating through rows and columns
    #
    num_cols = xl_sheet.ncols   # Number of columns
    for row_idx in range(0, xl_sheet.nrows):    # Iterate through rows
        print ('-'*40)
        print ('Row: %s' % row_idx)   # Print row number
        for col_idx in range(0, num_cols):  # Iterate through columns
            cell_obj = xl_sheet.cell(row_idx, col_idx)  # Get cell object by row, col
            print ('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))
示例#19
0
def extract_column_headings(xl_workbook, table, spreadsheet_type):
    sheet = xl_workbook.sheet_by_name(table.sheet_name)

    if spreadsheet_type in ["Time series"]:
        column_headings = pd.DataFrame()
        for r, title in table.column_titles.items():
            for c in table.cols:
                cell = sheet.row(r)[c]
                row_position = c - min(table.cols)
                if ctype_text.get(cell.ctype, 'unknown type') == "xldate":
                    column_headings.loc[row_position, title] = pd.to_datetime((cell.value - 25569) * 86400.0,
                                                                              unit='s').strftime('%d/%m/%Y')
                else:
                    column_headings.loc[row_position, title] = cell.value

        return column_headings, {}

    else:
        first_row = min(table.rows)
        if table.table_type == "long format":
            column_headings = []
            for c in list(table.cols) + table.row_descriptions:
                col_header = sheet.row(first_row-1)[c].value
                column_headings.append(col_header)
            return column_headings, {}

        # if it is wide format:
        column_headings = merged_data_function(xl_workbook, sheet_name=table.sheet_name,
                                               merged_data_cols=table.merged_meta_data, data_cols=table.cols,
                                               data_rows=table.rows, #rows=table.column_header_locations,
                                               extra_rows=table.extra_meta_data, spreadsheet_type=spreadsheet_type,
                                               column_header_locations=table.column_header_locations,
                                               last_row_in_sheet=table.last_row_in_sheet)

        column_subheadings = merged_data_subheadings_function(xl_workbook, sheet_name=table.sheet_name,
                                                              merged_data_cols=table.merged_meta_data,
                                                              data_cols=table.cols, data_rows=table.rows,
                                                              rows=table.column_header_locations,
                                                              extra_rows=table.extra_meta_data, top_row=table.top_row)
    # if table.sheet_name == "Table 1":
    #     print('column_headings', column_headings, 'column_subheadings', column_subheadings)

    return column_headings, column_subheadings
示例#20
0
def read_xls(fname, display):

    # Open the workbook
    xl_workbook = xlrd.open_workbook(fname)

    # Grab the first sheet by index
    #  (sheets are zero-indexed)
    xl_sheet = xl_workbook.sheet_by_index(0)
    if display:
        print('Sheet name: %s' % xl_sheet.name)

    # Pull the first row by index
    #  (rows/columns are also zero-indexed)
    if display:
        row0 = xl_sheet.row(0)  # 1st row
        row = xl_sheet.row(1)  # 2nd row

        # Print 1st row values and types
        from xlrd.sheet import ctype_text

        print('(Column #) type:value')
        for idx, cell_obj in enumerate(row):
            cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
            print('(%s) %s \t: %s' % (idx, row0[idx].value, cell_type_str))

    # Print all values, iterating through rows and columns
    tb = [['' for i in range(xl_sheet.ncols)] for j in range(xl_sheet.nrows)]

    for row_idx in range(0, xl_sheet.nrows):  # Iterate through rows
        if display:
            print('-' * 40)
            print('Row: %s' % row_idx)  # Print row number
        for col_idx in range(0, xl_sheet.ncols):  # Iterate through columns
            cell_obj = xl_sheet.cell(row_idx,
                                     col_idx)  # Get cell object by row, col
            tb[row_idx][col_idx] = xl_sheet.cell(row_idx, col_idx).value

            if display:
                print('Column: [%s] cell_obj: [%s]' %
                      (col_idx, cell_obj.value))

    return tb
示例#21
0
    def __init__(self):
        self.driver = webdriver.Firefox()
        self.driver.maximize_window()
        self.driver.delete_all_cookies()
        self.driver.get("https://www.heb.com.mx/")

        self.fname = join(dirname(dirname(abspath(__file__))), 'untitled',
                          'heblista.xlsx')
        self.xl_workbook = xlrd.open_workbook(self.fname)

        # List sheet names, and pull a sheet by name
        #
        self.sheet_names = self.xl_workbook.sheet_names()
        self.xl_sheet = self.xl_workbook.sheet_by_name(self.sheet_names[0])

        self.row = self.xl_sheet.row(0)  # 1st row

        self.arreglo2 = []
        self.arreglo3 = []
        self.arreglo = self.arreglo2, self.arreglo3
        self.contador3 = 0

        for self.idx, self.cell_obj in enumerate(self.row):
            self.cell_type_str = ctype_text.get(self.cell_obj.ctype,
                                                'unknown type')

        # Print all values, iterating through rows and columns
        #
        self.num_cols = self.xl_sheet.ncols  # Number of columns
        for self.row_idx in range(0,
                                  self.xl_sheet.nrows):  # Iterate through rows
            self.contador3 += 1
            for self.col_idx in range(0, self.num_cols):
                # Iterate through columns
                self.cell_obj = self.xl_sheet.cell(self.row_idx,
                                                   self.col_idx).value
                # Get cell object by row, col
                print(self.cell_obj)
                if self.contador3 == 1:
                    self.arreglo2.append(self.cell_obj)
                if self.contador3 == 2:
                    self.arreglo3.append(self.cell_obj)
示例#22
0
def checkveolia():
    url = URL()
    urlConnect = 'https://www.service-client.veoliaeau.fr/home.loginAction.do#inside-space'
    urlConso1 = 'https://www.service-client.veoliaeau.fr/home/espace-client/votre-consommation.html'
    urlConso2 = 'https://www.service-client.veoliaeau.fr/home/espace-client/votre-consommation.html?vueConso=historique'
    urlXls = 'https://www.service-client.veoliaeau.fr/home/espace-client/votre-consommation.exportConsommationData.do?vueConso=historique'
    urlDisconnect = 'https://www.service-client.veoliaeau.fr/logout'
    # Connect to Veolia website
    Domoticz.Log('Connection au site Veolia Eau')
    params = {
        'veolia_username': Parameters["Username"],
        'veolia_password': Parameters["Password"],
        'login': '******'
    }
    referer = 'https://www.service-client.veoliaeau.fr/home.html'
    url.call(urlConnect, params, referer)
    # Page 'votre consomation'
    #Domoticz.Log('Page de consommation')
    url.call(urlConso1)
    # Page 'votre consomation : historique'
    #Domoticz.Log('Page de consommation : historique')
    url.call(urlConso2)
    # Download XLS file
    Domoticz.Log('Telechargement du fichier')
    response = url.call(urlXls)
    content = response.read()
    # logout
    Domoticz.Log('Deconnection du site Veolia Eau')
    url.call(urlDisconnect)
    file = open('temp.xls', 'wb')
    file.write(content)
    file.close()
    book = xlrd.open_workbook('temp.xls', encoding_override="cp1252")
    sheet = book.sheet_by_index(0)
    last_rows = sheet.nrows
    row = sheet.row(last_rows - 1)
    for idx, cell_obj in enumerate(row):
        cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
        if idx == 1:
            UpdateDevice(1, 0, cell_obj.value)
示例#23
0
def read_xls(fname, display):
    
    # Open the workbook
    xl_workbook = xlrd.open_workbook(fname)
    
    # Grab the first sheet by index 
    #  (sheets are zero-indexed)
    xl_sheet = xl_workbook.sheet_by_index(0)
    if display:
        print ('Sheet name: %s' % xl_sheet.name)
    
    # Pull the first row by index
    #  (rows/columns are also zero-indexed)
    if display:
        row0 = xl_sheet.row(0)  # 1st row
        row  = xl_sheet.row(1)  # 2nd row
        
        # Print 1st row values and types
        from xlrd.sheet import ctype_text   
        
        print('(Column #) type:value')
        for idx, cell_obj in enumerate(row):
            cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
            print('(%s) %s \t: %s' % (idx, row0[idx].value, cell_type_str))
        
    # Print all values, iterating through rows and columns
    tb=[['' for i in range(xl_sheet.ncols)] for j in range(xl_sheet.nrows)]
    
    for row_idx in range(0, xl_sheet.nrows):    # Iterate through rows
        if display:
            print ('-'*40)
            print ('Row: %s' % row_idx)   # Print row number
        for col_idx in range(0, xl_sheet.ncols):  # Iterate through columns
            cell_obj = xl_sheet.cell(row_idx, col_idx)  # Get cell object by row, col
            tb[row_idx][col_idx] = xl_sheet.cell(row_idx, col_idx).value
            
            if display:
                print ('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj.value))
        
    return tb
示例#24
0
def get_column_stats(xl_sheet, col_idx):
    """
    :param xl_sheet:  Sheet object from Excel Workbook, extracted using xlrd
    :param col_idx: zero-indexed int indicating a column in the Excel workbook
    """
    if xl_sheet is None:
        print ('xl_sheet is None')
        return

    if not col_idx.isdigit():
        print ('Please enter a valid column number (0-%d)' % (xl_sheet.ncols-1))
        return

    col_idx = int(col_idx)
    if col_idx < 0 or col_idx >= xl_sheet.ncols:
        print ('Please enter a valid column number (0-%d)' % (xl_sheet.ncols-1))
        return 

    # Iterate through rows, and print out the column values
    row_vals = []
    for row_idx in range(0, xl_sheet.nrows):
        cell_obj = xl_sheet.cell(row_idx, col_idx)
        cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
        print ('(row %s) %s (type:%s)' % (row_idx, cell_obj.value, cell_type_str))
        row_vals.append(cell_obj.value)

    # Retrieve non-empty rows
    nonempty_row_vals = [x for x in row_vals if x]    
    num_rows_missing_vals = xl_sheet.nrows - len(nonempty_row_vals)
    print ('Vals: %d; Rows Missing Vals: %d' % (len(nonempty_row_vals), num_rows_missing_vals))

    # Count occurrences of values 
    counts = Counter(nonempty_row_vals)

    # Display value counts
    print ('-'*40 + 'n', 'Top Twenty Values', 'n' + '-'*40 )
    print ('Value [count]')
    for val, cnt in counts.most_common(20):
        print ('%s [%s]' % (val, cnt))
示例#25
0
def excelConverter(path, tname):
    excelFileInf="temp"
    global numBytes, numExfilFiles, numFiles, dT, eD, gpath, flg
    breakit=""
    for root, dirs, files in os.walk(path):
        if breakit=="true":
            break
        for name in files:
            if name == tname:
                print (os.path.abspath(os.path.join(root, name)))
                excelFileInf= os.path.abspath(os.path.join(root, name))
                breakit="true"
                break
    fileFil = open(str(numFiles)+"combinedExfil.txt", "a")
    SheetList = xlrd.open_workbook(excelFileInf)
    fnames = SheetList.sheet_names()
    fileFil.write("[Excel-Spreadsheet]")
    SpreadSheet = SheetList.sheet_by_index(0)
    fileFil.write('Sheet name:'+str(SpreadSheet.name))
    row = SpreadSheet.row(0)  #startshere
    for idx, spreadsheetCells in enumerate(row):
        ContentType = ctext.get(spreadsheetCells.ctype, 'Null')
        writtenText=str(spreadsheetCells.value)
        if numBytes+sys.getsizeof(writtenText)<45000:
            fileFil.write(("("+str(idx)+")"+" "+str(ContentType)+writtenText))
            numBytes+=sys.getsizeof(writtenText)
        else:
            numFiles+=1
            fileFil=open(+str(numFiles)+"combinedExfil.txt", "a")
            fileFil.write(("("+str(idx)+")"+" "+str(ContentType)+writtenText))
            numBytes=sys.getsizeof(writtenText)
    num_cols = SpreadSheet.ncols   # Number of columns
    for row_idx in range(0, SpreadSheet.nrows):     
        for col_idx in range(0, num_cols):  
            spreadsheetCells = SpreadSheet.cell(row_idx, col_idx)  # Get cell object by row, col
            fileFil.write('Row-Column[%s:%s] Content[%s]' % (row_idx, col_idx, spreadsheetCells))
num_rows = first_sheet.nrows
num_cols = first_sheet.ncols

print("No of Rows : " + str(num_rows))
print("No of Columns : " + str(num_cols))

header_found = 'N'
txn_count = 0

# Read through all Rows/Columns/Cells
print(100 * '=')
for row_num in range(0, num_rows):
    # Get First Cell details for each row
    current_row = first_sheet.row(row_num)
    first_cell = first_sheet.cell(row_num, 0)
    first_cell_type = ctype_text.get(first_cell.ctype, 'Unknown Type')
    first_cell_value = first_cell.value

    # Once header is found extract the txn records
    if header_found == 'Y':
        if first_cell_type != 'text' and \
           first_cell_value != '\t':
            print('Row number ' + str(row_num) + ' : ' + str(current_row))
            txn_count = txn_count + 1
            for col_num in range(0, num_cols):
                cell = first_sheet.cell(row_num, col_num)
                cell_type = ctype_text.get(cell.ctype, 'Unknown Type')
                cell_value = cell.value
                print('cell(%d,%d) = (type = %s, value = %s)' %
                      (row_num, col_num, cell_type, cell_value))
        if first_cell_type == 'text' and \
示例#27
0
def getCellDetails(row_num, col_num):
    cell = first_sheet.cell(row_num, col_num)
    cell_type = ctype_text.get(cell.ctype, 'Unknown Type')
    cell_value = cell.value
    return (cell, cell_type, cell_value)
def show_column_names(xl_sheet):
    row = xl_sheet.row(0)  # 1st row
    print(60 * "-" + "n(Column #) value [type]n" + 60 * "-")
    for idx, cell_obj in enumerate(row):
        cell_type_str = ctype_text.get(cell_obj.ctype, "unknown type")
        print("(%s) %s [%s]" % (idx, cell_obj.value, cell_type_str))
示例#29
0
 def import_xls(self, model, file, column_name=None, column_value=None):
     decoded_data = base64.decodestring(file)
     ftemp = 'temp' + datetime.utcnow().strftime('%H%M%S%f')[:-3]
     f = open(ftemp + '.xls', 'wb+')
     f.write(decoded_data)
     f.seek(0)
     f.close()
     wb = xlrd.open_workbook(f.name)
     st = wb.sheet_by_index(0)
     csv_file = open(ftemp + '.csv', 'wb')
     csv_out = unicodecsv.writer(csv_file,
                                 encoding='utf-8',
                                 quoting=unicodecsv.QUOTE_ALL)
     if st._cell_values:
         _HEADER_FIELDS = st._cell_values[0]
     for nrow in xrange(st.nrows):
         if nrow > 0:
             row_values = st.row_values(nrow)
             for index, val in enumerate(row_values):
                 ctype = st.cell(nrow, index).ctype
                 type = ctype_text.get(ctype, 'unknown type')
                 if type == 'empty' or type == 'text' \
                     or type == 'bool' or type == 'error' \
                         or type == 'blank':
                     row_values[index] = st.cell(nrow, index).value
                 elif type == 'number':
                     if not val:
                         row_values[index] = 0
                     else:
                         if not str(val).isdigit():
                             row_values[index] = int(val)
                         else:
                             row_values[index] = val
                 elif type == 'xldate':
                     str_date = self.xldate_to_datetime(
                         st.cell(nrow, index).value)
                     row_values[index] = str_date
                 else:
                     row_values[index] = st.cell(nrow, index).value
             csv_out.writerow(row_values)
         else:
             csv_out.writerow(st.row_values(nrow))
     csv_file.close()
     csv_file = open(ftemp + '.csv', 'r')
     file_txt = csv_file.read()
     csv_file.close()
     os.unlink(ftemp + '.xls')
     os.unlink(ftemp + '.csv')
     if not file_txt:
         raise ValidationError(_(str("File Not found.")))
     if column_name and column_value:
         _HEADER_FIELDS.insert(0, str(column_name))
         file_txt = self._add_column(column_name, column_value, file_txt)
     Import = self.env['base_import.import']
     imp = Import.create({
         'res_model': model,
         'file': file_txt,
     })
     [errors] = imp.do(
         _HEADER_FIELDS,
         {'headers': True, 'separator': ',',
          'quoting': '"', 'encoding': 'utf-8'})
     if errors:
         raise ValidationError(_(str(errors[0]['message'])))
     return file
示例#30
0
xl_workbook = xlrd.open_workbook("excelfile.xlsx")
print(xl_workbook)
sheet_names = xl_workbook.sheet_names()
print('Sheet Names', sheet_names)

xl_sheet = xl_workbook.sheet_by_index(4)
print('Sheet name: %s' % xl_sheet.name)

from xlrd.sheet import ctype_text


inten = []
energy = []
sum = 0
for row_index in range(1, xl_sheet.nrows):
    if (ctype_text.get(xl_sheet.cell(row_index, 3).ctype) == "empty" or xl_sheet.cell(row_index, 3).value == "-"):
        continue
    inten.append(float(xl_sheet.cell(row_index, 6).value) * 0.3)
    energy.append(float(xl_sheet.cell(row_index, 3).value))
    sum+=float(xl_sheet.cell(row_index, 6).value) * 0.3
print(int(sum))

creationism(energy, inten)

IntAndE = []
for i in range(0,4):
    IntAndE.append([])


for i in range(0, len(inten)):
示例#31
0
def read_curriculum():  
    cname = join(dirname(dirname(abspath(__file__))), 'crs', 'EE-curriculum-October2014 (11).xls')
    cl_workbook = xlrd.open_workbook(cname)

    sheet_names = cl_workbook.sheet_names()

    # print ('Sheet Names', sheet_names)

    # print ('hello')

    cl_sheet = cl_workbook.sheet_by_name(sheet_names[0])

    row = cl_sheet.row(0) 

    for idx, cell_obj in enumerate(row):
        cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
        # print ('(%s) %s %s ' % (idx, cell_type_str, cell_obj.value))


    num_cols = cl_sheet.ncols
    list_of_courses = []

    for row_idx in range(1, cl_sheet.nrows):
        # print ('-'*40)
        # print ('Row: %s ' % row_idx) #print row number
        for col_idx in range(0, num_cols):  # Iterate through columns
            cell_obj = cl_sheet.cell(row_idx, col_idx)  # Get cell object by row, col
            # print ('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))

     
    for row_idx in range(7, 14):
            #print ('-'*40)
            #print ('Row: %s ' % row_idx) #print row number
            list_of_courses.append(cl_sheet.cell(row_idx,0).value)
            list_of_courses.append(cl_sheet.cell(row_idx,7).value)

            #print ('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))

    # print (list_of_courses)

    for row_idx in range(19, 25):
            #print ('-'*40)
            #print ('Row: %s ' % row_idx) #print row number
            list_of_courses.append(cl_sheet.cell(row_idx,0).value)
            list_of_courses.append(cl_sheet.cell(row_idx,7).value)
            
            #print ('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))

    # print (list_of_courses)

    for row_idx in range(30, 36):
            #print ('-'*40)
            #print ('Row: %s ' % row_idx) #print row number
            list_of_courses.append(cl_sheet.cell(row_idx,0).value)
            list_of_courses.append(cl_sheet.cell(row_idx,7).value)
            
            #print ('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))

    # print (list_of_courses)

    for row_idx in range(41, 46):
            #print ('-'*40)
            #print ('Row: %s ' % row_idx) #print row number
            list_of_courses.append(cl_sheet.cell(row_idx,0).value)
            list_of_courses.append(cl_sheet.cell(row_idx,7).value)
            
            #print ('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))

    list_of_courses.append(cl_sheet.cell(15,7).value)


    # print (list_of_courses)
    return list_of_courses
示例#32
0
    def handle(self, **options):
        commit = options.get('commit')
        region = options.get('region')
        excel_file = options.get('excel_file')
        risk_analysis = options.get('risk_analysis')
        excel_metadata_file = options.get('excel_metadata_file')
        risk_app = options.get('risk_app')
        app = RiskApp.objects.get(name=risk_app)

        if region is None:
            raise CommandError(
                "Input Destination Region '--region' is mandatory")

        if risk_analysis is None:
            raise CommandError("Input Risk Analysis associated to the File \
'--risk_analysis' is mandatory")

        if not excel_file or len(excel_file) == 0:
            raise CommandError(
                "Input Risk Data Table '--excel_file' is mandatory")

        risk = RiskAnalysis.objects.get(name=risk_analysis, app=app)

        wb = xlrd.open_workbook(filename=excel_file)
        region = Region.objects.get(name=region)
        region_code = region.administrative_divisions.filter(
            parent=None)[0].code

        scenarios = RiskAnalysisDymensionInfoAssociation.objects.filter(
            riskanalysis=risk, axis='x')
        round_periods = RiskAnalysisDymensionInfoAssociation.objects.filter(
            riskanalysis=risk, axis='y')

        # print('typename = %s' % (risk.layer.typename))

        table_name = risk.layer.typename.split(":")[1] \
            if ":" in risk.layer.typename else risk.layer.typename

        for scenario in scenarios:
            # Dump Vectorial Data from DB
            datastore = settings.OGC_SERVER['default']['DATASTORE']
            if (datastore):
                ogc_db_name = settings.DATABASES[datastore]['NAME']
                ogc_db_user = settings.DATABASES[datastore]['USER']
                ogc_db_passwd = settings.DATABASES[datastore]['PASSWORD']
                ogc_db_host = settings.DATABASES[datastore]['HOST']
                ogc_db_port = settings.DATABASES[datastore]['PORT']

            sheet = wb.sheet_by_name(scenario.value)
            row_headers = sheet.row(0)
            for rp_idx, rp in enumerate(round_periods):
                col_num = -1
                if app.name == RiskApp.APP_DATA_EXTRACTION:
                    for idx, cell_obj in enumerate(row_headers):
                        # cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
                        # print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value))
                        try:
                            # if int(cell_obj.value) == int(rp.value):
                            # print('{} =? {}'.format(rp.value, cell_obj.value))
                            if self.to_int_if_number(
                                    cell_obj.value) == self.to_int_if_number(
                                        rp.value):
                                # print('[%s] (%s) RP-%s' % (scenario.value, idx, rp.value))
                                col_num = idx
                                break
                        except:
                            traceback.print_exc()
                            pass
                elif app.name == RiskApp.APP_COST_BENEFIT:
                    col_num = 0

                if col_num >= 0:
                    conn = self.get_db_conn(ogc_db_name, ogc_db_user,
                                            ogc_db_port, ogc_db_host,
                                            ogc_db_passwd)
                    try:
                        if app.name == RiskApp.APP_DATA_EXTRACTION:
                            for row_num in range(1, sheet.nrows):
                                cell_obj = sheet.cell(row_num, 5)
                                iso_country = str(
                                    sheet.cell(row_num, 2).value)[:2]
                                cell_type_str = ctype_text.get(
                                    cell_obj.ctype, 'unknown type')
                                # print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value))
                                if cell_obj.value:
                                    adm_code = cell_obj.value \
                                        if cell_type_str == 'text' \
                                        else iso_country + '{:05d}'.format(int(cell_obj.value))
                                    print('adm code read from cell: {}'.format(
                                        adm_code))
                                    try:
                                        adm_div = AdministrativeDivision.objects.get(
                                            code=adm_code)
                                    except ObjectDoesNotExist:
                                        traceback.print_exc()
                                        pass
                                    value = sheet.cell_value(row_num, col_num)
                                    print('[%s] (%s) %s (%s) / %s' %
                                          (scenario.value, rp.value,
                                           adm_div.name, adm_code, value))

                                    db_values = {
                                        'table':
                                        table_name,  # From rp.layer
                                        'the_geom':
                                        geos.fromstr(adm_div.geom,
                                                     srid=adm_div.srid),
                                        'dim1':
                                        scenario.value,
                                        'dim1_order':
                                        scenario.order,
                                        'dim2':
                                        rp.value,
                                        'dim2_order':
                                        rp.order,
                                        'dim3':
                                        None,
                                        'dim4':
                                        None,
                                        'dim5':
                                        None,
                                        'risk_analysis':
                                        risk_analysis,
                                        'hazard_type':
                                        risk.hazard_type.mnemonic,
                                        'admin':
                                        adm_div.name.encode('utf-8').replace(
                                            "'", "''"),
                                        'adm_code':
                                        adm_div.code,
                                        'region':
                                        region.name,
                                        'value':
                                        value
                                    }
                                    self.insert_db(conn, db_values)
                                    risk_adm = RiskAnalysisAdministrativeDivisionAssociation.\
                                        objects.\
                                        filter(riskanalysis=risk, administrativedivision=adm_div)
                                    if len(risk_adm) == 0:
                                        RiskAnalysisAdministrativeDivisionAssociation.\
                                            objects.\
                                            create(riskanalysis=risk, administrativedivision=adm_div)
                        elif app.name == RiskApp.APP_COST_BENEFIT:
                            cell_obj = sheet.cell(rp_idx + 1, 0)
                            cell_type_str = ctype_text.get(
                                cell_obj.ctype, 'unknown type')
                            if cell_obj.value:
                                adm_div = AdministrativeDivision.objects.get(
                                    name=region)
                                value = sheet.cell_value(rp_idx + 1, 1)
                                print('[%s] (%s) %s / %s' %
                                      (scenario.value, rp.value, adm_div.name,
                                       value))

                                db_values = {
                                    'table':
                                    table_name,  # From rp.layer
                                    'the_geom':
                                    geos.fromstr(adm_div.geom,
                                                 srid=adm_div.srid),
                                    'dim1':
                                    scenario.value,
                                    'dim1_order':
                                    scenario.order,
                                    'dim2':
                                    rp.value,
                                    'dim2_order':
                                    rp.order,
                                    'dim3':
                                    None,
                                    'dim4':
                                    None,
                                    'dim5':
                                    None,
                                    'risk_analysis':
                                    risk_analysis,
                                    'hazard_type':
                                    risk.hazard_type.mnemonic,
                                    'admin':
                                    adm_div.name.encode('utf-8').replace(
                                        "'", "''"),
                                    'adm_code':
                                    adm_div.code,
                                    'region':
                                    region.name,
                                    'value':
                                    value
                                }
                                self.insert_db(conn, db_values)
                                risk_adm = RiskAnalysisAdministrativeDivisionAssociation.\
                                objects.\
                                filter(riskanalysis=risk, administrativedivision=adm_div)
                                if len(risk_adm) == 0:
                                    RiskAnalysisAdministrativeDivisionAssociation.\
                                    objects.\
                                    create(riskanalysis=risk, administrativedivision=adm_div)

                        # Finished Import: Commit on DB
                        conn.commit()
                    except Exception:
                        try:
                            conn.rollback()
                        except:
                            pass

                        traceback.print_exc()
                    finally:
                        conn.close()

        # Import or Update Metadata if Metadata File has been specified/found
        if excel_metadata_file:
            call_command('importriskmetadata',
                         region=region.name,
                         excel_file=excel_metadata_file,
                         risk_analysis=risk_analysis,
                         risk_app=[app.name])
            risk.metadata_file = excel_metadata_file

        # Finalize
        risk.data_file = excel_file
        if commit:
            risk.save()

        return risk_analysis
示例#33
0
    workbook = xlrd.open_workbook(filename=args.excel_file)
    print workbook.sheet_names()
    worksheet = workbook.sheet_by_name(u"Sheet 1")
    num_rows = worksheet.nrows - 1
    curr_row = -1

    matches = []

    while curr_row < num_rows:
        curr_row += 1
        row = worksheet.row(curr_row)

        n0 = row[0]
        eid0 = row[2]
        cell_type_str = ctype_text.get(n0.ctype, "unknown type")
        if cell_type_str == "number":
            matches.append([int(n0.value), eid0.value])

        n0 = row[5]
        eid0 = row[7]
        cell_type_str = ctype_text.get(n0.ctype, "unknown type")
        if cell_type_str == "number":
            matches.append([int(n0.value), eid0.value])

    # Connect to AgBase
    ab = AgBase()
    ab.set_logging_on(True)
    user = ab.connect(args.user, args.passwd, args.server)
    if user is None:
        print ("ERROR: Login failed.")
示例#34
0
def extract_data(table, xl_workbook, spreadsheet_type, df):

    sheet = xl_workbook.sheet_by_name(table.sheet_name)

    # Add data to the dataframe
    first_row = min(table.rows)
    first_col = min(table.cols)

    for r in table.rows:
        for c in table.cols:
            cell = sheet.row(r)[c]
            df.loc[r - first_row, c - first_col] = cell.value

    # Reset row indices so they go 0,1,2...nmain.py
    df = df.reset_index(drop=True)
    # Add row descriptions to the dataframe
    row_descriptions = table.row_descriptions
    indentation_levels = table.indentation_levels

    if table.table_type == "long format":
        # for i, col in enumerate(reversed(row_descriptions)):
        #     df.insert(loc=i, column='descriptor_col_' + str(i), value=['' for j in range(df.shape[0])])
        #     for r in table.rows:
        #         cell = sheet.row(r)[col]
        #         if ctype_text.get(cell.ctype, 'unknown type') == "xldate":
        #             df.loc[r - first_row, 'descriptor_col_' + str(i)] = pd.to_datetime((cell.value - 25569) * 86400.0,
        #                                                                                unit='s').strftime('%d/%m/%Y')
        #         else:
        #             df.loc[r - first_row, 'descriptor_col_' + str(i)] = cell.value
        # print('df', df)

        row_headings = merged_data_row_headings_function(xl_workbook, sheet_name=table.sheet_name,
                                                         merged_data_rows=table.merged_meta_data_row_headings,
                                                         data_rows=table.rows,
                                                         row_descriptions=table.row_descriptions,
                                                         row_titles=table.row_titles,
                                                         top_header_row=table.top_header_row)
        df = df.join(row_headings)
    #print('df', df)

    if table.table_type == "wide format":
        # Insert empty columns into dataframe, where the descriptions will go
        if table.columns_with_indentation:
            for c in table.columns_with_indentation:
                for idx, i in enumerate(indentation_levels[c]):
                    df.insert(loc=idx, column='descriptor_col_' + str(c) + str(i), value=['' for j in range(df.shape[0])])

            row = 0
            top_cell_row = min(table.rows)

            for r in table.rows:
                column = 0
                for c in table.columns_with_indentation:
                    for i in indentation_levels[c]:
                        cell_row = r
                        cell = sheet.row(cell_row)[c]
                        indentation_cell = int(xl_workbook.xf_list[sheet.cell_xf_index(cell_row, c)].alignment.indent_level)
                        while indentation_cell > i or cell.value is None:
                            cell_row -= 1
                            if cell_row < 1:
                                break
                            cell = sheet.row(cell_row)[c]
                            indentation_cell = int(xl_workbook.xf_list[sheet.cell_xf_index(cell_row, c)].alignment.indent_level)
                            if cell_row < top_cell_row:
                                top_cell_row = cell_row
                        if indentation_cell == i:
                            if ctype_text.get(cell.ctype, 'unknown type') == "xldate":
                                df.loc[row, df.columns[column]] = pd.to_datetime((cell.value - 25569) * 86400.0,
                                                                                 unit='s').strftime('%d/%m/%Y')
                            else:
                                df.loc[row, df.columns[column]] = cell.value
                        column += 1
                    row += 1
        other_columns = set(i for i in table.row_descriptions if i not in table.columns_with_indentation)

        if other_columns:
            row_headings = merged_data_row_headings_function(xl_workbook, sheet_name=table.sheet_name,
                                                             merged_data_rows=table.merged_meta_data_row_headings,
                                                             data_rows=table.rows,
                                                             row_descriptions=other_columns,
                                                             row_titles=table.row_titles,
                                                             top_header_row=table.top_header_row)
            if spreadsheet_type == "Time series":
                row_headings.rename(columns={'Series ID': 'Date'}, inplace=True)
            df = df.join(row_headings)
        # if table.sheet_name == "Table 1":
        #     print(df)

    # if table.table_type == "wide format":
    #     # Insert empty columns into dataframe, where the descriptions will go
    #     if indentation_levels:
    #         for i in indentation_levels:
    #             df.insert(loc=i, column='descriptor_col_' + str(i), value=['' for j in range(df.shape[0])])
    #
    #         row = 0
    #         top_cell_row = min(table.rows)
    #
    #         for r in table.rows:
    #             column = 0
    #             for i in indentation_levels:
    #                 cell_row = r
    #                 cell = sheet.row(cell_row)[row_descriptions[0]]
    #                 indentation_cell = xl_workbook.xf_list[sheet.cell_xf_index(cell_row, row_descriptions[0])].alignment.indent_level
    #                 while indentation_cell > i:
    #                     cell_row -= 1
    #                     cell = sheet.row(cell_row)[row_descriptions[0]]
    #                     indentation_cell = xl_workbook.xf_list[sheet.cell_xf_index(cell_row, row_descriptions[0])].alignment.indent_level
    #                     if cell_row < top_cell_row:
    #                         top_cell_row = cell_row
    #                 if indentation_cell == i:
    #                     if ctype_text.get(cell.ctype, 'unknown type') == "xldate":
    #                         df.loc[row, df.columns[column]] = pd.to_datetime((cell.value - 25569) * 86400.0,
    #                                                                          unit='s').strftime('%d/%m/%Y')
    #                     else:
    #                         df.loc[row, df.columns[column]] = cell.value
    #                 column += 1
    #             row += 1
    #     else:
    #         row_headings = merged_data_row_headings_function(xl_workbook, sheet_name=table.sheet_name,
    #                                                          merged_data_rows=table.merged_meta_data_row_headings,
    #                                                          data_rows=table.rows,
    #                                                          row_descriptions=table.row_descriptions,
    #                                                          row_titles=table.row_titles)
    #         df = df.join(row_headings)


    return df
示例#35
0
    def transPerFile(self, infile, outfile):
        """Called on a per file basis from transformAll"""

        self.mtime = os.path.getmtime(infile)
        wb = xlrd.open_workbook(filename=infile, on_demand=True)
        sheet = wb.sheet_by_index(0)

        root = ET.Element(
            "museumPlusExport",
            attrib={
                "version": "2.0",
                "level": "dirty",
            },
        )
        tree = ET.ElementTree(root)

        columns = [sheet.cell(0, c).value for c in range(sheet.ncols)]

        base = os.path.basename(infile)

        # print ("%s -> %s" % (infile, tag))
        # invalid xml characters: will be stripped
        remove_re = re.compile(u"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]")

        for r in range(1, sheet.nrows):  # leave out column headers
            if re.match("so", base, flags=re.I):
                tag = "sammlungsobjekt"
                attrib = "objId"

            elif re.match("pk", base, flags=re.I):
                tag = "personKörperschaft"
                attrib = "kueId"

            elif re.match("mm", base, flags=re.I):
                tag = "multimediaobjekt"
                attrib = "mulId"

            elif re.match("aus", base, flags=re.I):
                tag = "ausstellung"
                attrib = "ausId"
            else:
                print("Error: Unknown file %s" % infile)
                sys.exit(1)

            index = sheet.cell(r, columns.index(attrib)).value
            if index:
                index = str(int(index))

            if index != "":  # Dont include rows without meaningful index
                t = datetime.fromtimestamp(
                    self.mtime, timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
                # print ('AAAAAAAA'+str(t))
                doc = ET.SubElement(root,
                                    tag,
                                    attrib={
                                        attrib: index,
                                        "exportdatum": str(t)
                                    })
                print("INDEX: %s" % index)  # should this become verbose?

                row_dict = {}

                for c in range(sheet.ncols):
                    cell = sheet.cell(r, c)
                    cellTypeStr = ctype_text.get(cell.ctype, "unknown type")
                    tag = sheet.cell(0, c).value
                    tag = (tag[0].lower() + tag[1:]
                           )  # I want lowercase initial for all element names

                    tag = re.sub(
                        r"\W|&|<|>|:", "",
                        tag)  # xml spec: strip illegal chars for elements
                    if re.search(r"\A[0-9]", tag):
                        raise ValueError(
                            "XML spec doesn't allow elements to begin with numbers"
                        )
                    # type conversions
                    if cellTypeStr == "number":
                        # val=int(float(cell.value))
                        val = int(cell.value)
                        # print ("number:%s" % val)

                    elif cellTypeStr == "xldate":
                        val = xlrd.xldate.xldate_as_datetime(cell.value, 0)
                        # print ("XLDATE %s" % (val))

                    elif cellTypeStr == "text":
                        # val=escape() leads to double escape
                        val = remove_re.sub("",
                                            cell.value)  # rm illegal xml char
                        # print ("---------TypeError %s" % cellTypeStr)

                    if cellTypeStr != "empty":  # write non-empty elements
                        # print ("%s:%s" % (attrib, tag))
                        val = str(val).strip(
                        )  # rm leading and trailing whitespace; turn into str
                        if tag != attrib and val != "":
                            # print ( '%s: %s (%s)' % (tag, val, cellTypeStr))
                            row_dict[tag] = val

                for tag in sorted(row_dict.keys()):
                    ET.SubElement(doc, tag).text = row_dict[tag]

        self.indent(root)

        # print ('%s->%s' % (inpath, outfile))
        tree.write(outfile, encoding="UTF-8", xml_declaration=True)
示例#36
0
def merged_data_row_headings_function(xl_workbook, sheet_name,
                                      merged_data_rows, data_rows,
                                      row_descriptions, row_titles,
                                      top_header_row):
    """ Function to extract data from merged cells
    merged_data_rows is a list of tuples. Each tuple is in the format used by xlrd function merged_cells """

    sheet = xl_workbook.sheet_by_name(sheet_name)

    other_rows = set(i for i in range(top_header_row + 1, max(data_rows))
                     if i not in data_rows)
    all_rows = data_rows.union(other_rows)

    # sheet.merged_cells returns a list of tuples. Each tuple has 4 elements a,b,c,d
    # a,c is the top-left coordinate (row / col, starting with 0) where the merge starts.
    # b,d is the bottom right coordinate (row / col, starting with 1) where the merge finishes

    row_headings = pd.DataFrame()
    all_positions = []
    for i in all_rows:
        for j in row_descriptions:
            all_positions.append((i, i + 1, j, j + 1))

    merged_meta_data_row_headings = list(
        filter(lambda x: x[0] in data_rows, merged_data_rows))

    all_merged_positions = []
    for i in merged_meta_data_row_headings:
        j = i[0]  # start row
        k = 1
        while j < i[1]:
            for cells in range(i[0], i[0] + k):
                all_merged_positions.append((cells, i[0] + k, i[2], i[3]))
            j += 1
            k += 1

    merged_meta_data_extended = copy.copy(merged_meta_data_row_headings)
    merged_meta_data_extended.extend(i for i in all_positions
                                     if i not in all_merged_positions)
    # Needs to be sorted to ensure the descriptions line up properly with the data
    merged_meta_data_extended.sort(key=itemgetter(0, 2))

    values = [0]
    values.extend(row for row in range(min(data_rows), max(data_rows))
                  if row not in data_rows)
    keys = [0]
    k = 1
    for v in values:
        keys.append(k)
        k += 1

    empty_rows = dict(zip(keys, values))

    descriptions_in_other_rows = []
    columns_included = set()
    first_data_row = min(data_rows)
    for c in row_descriptions:
        column_heading = row_titles[c]
        for i in merged_meta_data_extended:
            if i[2] == c:
                row_position = i[
                    0] - first_data_row  # row position in df (in other words, the row number)
                # Filter out entries that occur in empty columns
                empty_rows_filtered = dict(
                    filter(lambda elem: elem[1] < i[1], empty_rows.items()))
                if empty_rows_filtered:
                    row_position = row_position - max(
                        empty_rows_filtered, key=empty_rows_filtered.get)
                    for k in range(i[0], i[1]):
                        cell_value = sheet.row(i[0])[i[2]].value
                        if cell_value != '':
                            descriptions_in_other_rows.append({
                                'Row':
                                i[1],
                                'Col':
                                i[2],
                                'row_position':
                                row_position,
                                'Desc_row' + str(i[2]):
                                cell_value
                            })
                for k in range(i[0], i[1]):
                    cell = sheet.row(i[0])[i[2]]
                    if row_position >= 0 and i[0] in data_rows:
                        if ctype_text.get(cell.ctype,
                                          'unknown type') == "xldate":
                            row_headings.loc[row_position,
                                             column_heading] = pd.to_datetime(
                                                 (cell.value - 25569) *
                                                 86400.0,
                                                 unit='s').strftime('%d/%m/%Y')
                        else:
                            row_headings.loc[row_position,
                                             column_heading] = cell.value
                        if cell.value != '' and i[0] in data_rows:
                            columns_included.add(i[2])
                    row_position += 1

    if descriptions_in_other_rows:
        descriptions_in_other_rows = list(
            filter(
                lambda i: i['Row'] not in data_rows and i['Col'] not in
                columns_included, descriptions_in_other_rows))
        for d in descriptions_in_other_rows:
            del d['Col']

    if descriptions_in_other_rows:
        spreadsheet_rows = list(range(min(data_rows), max(data_rows) + 1))
        correspondence = {}
        k = 0
        for i in spreadsheet_rows:
            if i in data_rows:
                correspondence[i] = k
                k += 1
        correspondence = pd.DataFrame(correspondence.items(),
                                      columns=['index', 'New_index'])

        descriptions_in_other_rows = pd.DataFrame(descriptions_in_other_rows)
        descriptions_in_other_rows = descriptions_in_other_rows.sort_values(
            by=['Row'])

        descriptions_in_other_rows.set_index('Row', inplace=True)
        descriptions_in_other_rows = descriptions_in_other_rows.reindex(
            range(max(data_rows) + 1))
        descriptions_in_other_rows.ffill(axis=0, inplace=True)
        descriptions_in_other_rows['index'] = descriptions_in_other_rows.index
        descriptions_in_other_rows = descriptions_in_other_rows.merge(
            correspondence, on='index', how='left')

        descriptions_in_other_rows.drop(['row_position', 'index'],
                                        axis=1,
                                        inplace=True)

        descriptions_in_other_rows.set_index('New_index', inplace=True)
        descriptions_in_other_rows.rename_axis(None, inplace=True)
        row_headings = row_headings.join(descriptions_in_other_rows)

    return row_headings
示例#37
0
def show_column_names(xl_sheet):
    row = xl_sheet.row(0)  # 1st row
    print(60*'-' + 'n(Column #) value [type]n' + 60*'-')
    for idx, cell_obj in enumerate(row):
        cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
        print('(%s) %s [%s]' % (idx, cell_obj.value, cell_type_str, ))
示例#38
0
            lastname = sheet.row(row)[0].value.strip().title()
        else:
            name = sheet.row(row)[0].value.title()
            if ',' in name:
                names = name.split(',')
            if '/' in name:
                names = name.split('/')
            else:
                name = name.split(' ')

            lastname = names[0].strip()
            firstname = names[1].strip()
            print lastname, firstname

        element = sheet.row(row)[2]
        if ctype_text.get(element.ctype) == 'xldate':
            bdate = xldate_to_datetime(element.value)
        else:
            bdate = element.value.strip().decode('utf-8')
        print bdate

        element = sheet.row(row)[4]
        if ctype_text.get(element.ctype) == 'number':
            audio_clip = int(element.value)
        else:
            audio_clip = element.value
        print 'audio_clip', audio_clip

        element = sheet.row(row)[5]
        program = ''
        if element.value != '':
示例#39
0
#####
# There are 2 ways to open the desired excel worksheet: 
# by index -----> xl_wrkbk.sheet_by_index(0) 
# by name -----> xl_wrkbk.sheet_by_name('Sheet1')
xl_sheet = xl_wrkbk.sheet_by_index(0)

#####
# Extract a row from the opened excel-sheet.
# NOTE: Starting index for both rows and columns is 0(zero).
a_row = xl_sheet.row(0)

#####
# A good way to print row meta-information and values
print('(Column #) type:value')
for idx, cell_obj in enumerate(a_row):
  cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
  print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value))

#####
# Iterate through each cell and print the value
# Number of columns
num_cols = xl_sheet.ncols

# Iterate through rows
for row_idx in range(0, xl_sheet.nrows):
  print ('-'*40)
  print ('Row: %s' % row_idx)
    
  # Iterate through columns
  for col_idx in range(0, num_cols):
    cell_obj = xl_sheet.cell(row_idx, col_idx)  # Get cell object by row, col
示例#40
0
def callback(ch, method, properties, body):
    print(" [x] Received %r" % body)
    excel = body
    excel_dict = json.loads(excel)
    encoding_data = excel_dict["base64buffer"]
    decoded_data = base64.b64decode(encoding_data)
    file_extension = excel_dict["fileName"]
    userId = excel_dict["userId"]
    filename = excel_dict["fileName"]
    curr_date = excel_dict["curr_date"]
    message_format = {
        "userid": userId,
        "base64buffer": base64_message,
        "date": curr_date,
        "fileName": filename
    }
    message = json.dumps(message_format)
    print(userId)
    print(filename)

    if "base64buffer" in excel_dict:
        if str(file_extension).split(".")[1] == 'xls':
            print('excel format : xls')
            try:
                wbb1 = Workbook()
                ws = wbb1.active
                #xl_workbook = open_workbook('c:\om\exampleMasterCard.xls') #           for read a file type
                xl_workbook = open_workbook(file_contents=decoded_data,
                                            on_demand=True)
                sheet_names = xl_workbook.sheet_names()
                #print(sheet_names)
                xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])

                xl_sheet = xl_workbook.sheet_by_index(0)
                #print('Sheet name: %s' % xl_sheet.name)
                row = xl_sheet.row(0)
                # print (row)
                #print('(Column #) type:value')
                for idx, cell_obj in enumerate(row):
                    cell_type_str = ctype_text.get(cell_obj.ctype,
                                                   'unknown type')
                    #print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value))

                num_cols = xl_sheet.ncols  # Number of columns
                for row_idx in range(0,
                                     xl_sheet.nrows):  # Iterate through rows
                    #print('-' * 40)
                    #print('Row: %s' % row_idx)  # Print row number
                    for col_idx in range(0,
                                         num_cols):  # Iterate through columns
                        cell_obj = xl_sheet.cell(
                            row_idx, col_idx)  # Get cell object by row, col
                        #print('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))
                        ws.cell(row=row_idx + 1,
                                column=col_idx + 1).value = cell_obj.value

                f1 = 'c:\\OM\\output-xls-' + str(uuid.uuid4()) + '.xlsx'
                wbb1.save(filename=f1)

                workbook = openpyxl.load_workbook(f1)
                # wb = load_workbook(workbook)
                wb1 = workbook.active
                mr = wb1.max_row
                mc = wb1.max_column

                match = wb1.cell(row=1, column=1)
                if match.value == None:
                    print("ISRACARD")
                    wb1.delete_cols(3, 2)
                    sheet = wb1
                    alist = []
                    blist = []

                    wbb = Workbook()
                    ws = wbb.active
                    x = [
                        'תאריך עסקה', 'שם בית העסק', 'סכום עסקה', 'קטגוריה',
                        'מספר כרטיס'
                    ]
                    counter = 1
                    for i in x:
                        ws.cell(row=1, column=counter, value=i)
                        counter += 1
                    for i in range(4, mr):
                        b3 = sheet.cell(row=i, column=1)
                        s = str(b3.value).split(" ")
                        r = re.compile('[0-9]{4}')
                        newlist = list(filter(r.match, s))
                        list1 = [newlist, i]
                        if newlist != []:
                            alist.append(
                                i)  # add row number for cars number to list
                            blist.append(
                                ', '.join(newlist))  # add cards number to list
                        else:
                            continue
                    alist.append(mr + 5)

                    for a in range(0, len(alist) - 1):
                        for i in range(alist[a] + 3, alist[a + 1] - 2):
                            for j in range(1, 4):
                                # reading cell value from source excel file
                                c = wb1.cell(row=i, column=j)
                                # writing the read value to destination excel file
                                ws.cell(row=i - 5, column=j).value = c.value
                                ws.cell(row=i - 5,
                                        column=5).value = str(blist[a])

                    for j in range(1, 4):
                        for i in range(1, mr):
                            if ws.cell(row=i, column=1).value is None:
                                ws.delete_rows(i)
                            else:
                                continue
                    f = 'c:\\OM\\output-isracard-' + str(
                        uuid.uuid4()) + '.xlsx'
                    wbb.save(filename=f)

                    # channel.queue_declare(queue='Yogevhello', durable=True)
                    with open(f, 'rb') as binary_file:
                        # print(binary_file)
                        binary_file_data = binary_file.read()
                        base64_encoded_data = base64.b64encode(
                            binary_file_data)
                        base64_message = base64_encoded_data.decode('utf-8')
                        print("Before publish to yogev")
                        # wwc = {"userid": "Eldar", "time": "10/04/2020", "base64buffer": base64_message}
                        # message_format = {"userid": userId, "time": curr_date, "base64buffer": base64_message,
                        #                   "date": curr_date}
                        # message = json.dumps(message_format)

                        publisher.sender(message)
                        # print(f)

                        print("send message to sender function")
                        # time.sleep(0.1)
                        # os.remove(f)

                else:
                    print("VISA")
                    wb1.delete_cols(3)
                    sheet = wb1
                    clist = []
                    # card = wb1.cell(row=2, column=1)
                    # s = str(card.value).split(",")[1].split(" ")[3]
                    # card_re = str(re.findall(r'[0-9]{4}',s))

                    b3 = sheet.cell(row=2, column=1)
                    s = str(b3.value).split(",")[1].split(" ")[3]
                    print(s)
                    r = re.compile('[0-9]{4}')
                    # print(r)
                    newlist = list(s)
                    # rint(newlist)
                    list1 = [newlist, 2]
                    if newlist != []:
                        clist.append(
                            ', '.join(newlist))  # add cards number to list

                    wbb = Workbook()
                    ws = wbb.active
                    x = [
                        'תאריך עסקה', 'שם בית העסק', 'סכום עסקה', 'קטגוריה',
                        'מספר כרטיס'
                    ]
                    counter = 1
                    for i in x:
                        ws.cell(row=1, column=counter, value=i)
                        counter += 1

                    for i in range(4, mr):
                        for j in range(1, 4):
                            c = wb1.cell(row=i, column=j)
                            ws.cell(row=i - 2, column=j).value = c.value
                            ws.cell(row=i - 2, column=5).value = s

                    f = 'c:\\OM\\output-visa-' + str(uuid.uuid4()) + '.xlsx'
                    wbb.save(filename=f)

                    with open(f, 'rb') as binary_file:
                        binary_file_data = binary_file.read()
                        base64_encoded_data = base64.b64encode(
                            binary_file_data)
                        base64_message = base64_encoded_data.decode('utf-8')
                        print("Before publish to yogev")

                        # message_format = {"userid": userId, "time": curr_date, "base64buffer": base64_message,
                        #                   "date": curr_date}
                        # message = json.dumps(message_format)

                        publisher.sender(message)
            except:
                print('cannot parsing xlsx file, the file is corrupted')

        if str(file_extension).split(".")[1] == 'xlsx':
            print('excel format : xlsx')
            try:
                xls_filelike = io.BytesIO(decoded_data)
                workbook = openpyxl.load_workbook(xls_filelike)
                wbb2 = Workbook()
                ws = wbb2.active
                data = workbook
                # parser.read_file(data)
                f2 = 'c:\\OM\\output-xlsx-' + str(uuid.uuid4()) + '.xlsx'
                workbook.save(filename=f2)

                workbook = openpyxl.load_workbook(f2)
                #wb = load_workbook(workbook)
                wb1 = workbook.active
                mr = wb1.max_row
                mc = wb1.max_column

                match = wb1.cell(row=1, column=1)
                if match.value == None:
                    print("ISRACARD")
                    wb1.delete_cols(3, 2)
                    sheet = wb1
                    alist = []
                    blist = []

                    wbb = Workbook()
                    ws = wbb.active
                    x = [
                        'תאריך עסקה', 'שם בית העסק', 'סכום עסקה', 'קטגוריה',
                        'מספר כרטיס'
                    ]
                    counter = 1
                    for i in x:
                        ws.cell(row=1, column=counter, value=i)
                        counter += 1
                    for i in range(4, mr):
                        b3 = sheet.cell(row=i, column=1)
                        s = str(b3.value).split(" ")
                        r = re.compile('[0-9]{4}')
                        newlist = list(filter(r.match, s))
                        list1 = [newlist, i]
                        if newlist != []:
                            alist.append(
                                i)  # add row number for cars number to list
                            blist.append(
                                ', '.join(newlist))  # add cards number to list
                        else:
                            continue
                    alist.append(mr + 5)

                    for a in range(0, len(alist) - 1):
                        for i in range(alist[a] + 3, alist[a + 1] - 2):
                            for j in range(1, 4):
                                # reading cell value from source excel file
                                c = wb1.cell(row=i, column=j)
                                # writing the read value to destination excel file
                                ws.cell(row=i - 5, column=j).value = c.value
                                ws.cell(row=i - 5,
                                        column=5).value = str(blist[a])

                    for j in range(1, 4):
                        for i in range(1, mr):
                            if ws.cell(row=i, column=1).value is None:
                                ws.delete_rows(i)
                            else:
                                continue
                    f = 'c:\\OM\\output-isracard' + str(uuid.uuid4()) + '.xlsx'
                    wbb.save(filename=f)

                    #channel.queue_declare(queue='Yogevhello', durable=True)
                    with open(f, 'rb') as binary_file:
                        # print(binary_file)
                        binary_file_data = binary_file.read()
                        base64_encoded_data = base64.b64encode(
                            binary_file_data)
                        base64_message = base64_encoded_data.decode('utf-8')
                        print("Before publish to yogev")
                        #wwc = {"userid": "Eldar", "time": "10/04/2020", "base64buffer": base64_message}
                        # message_format = {"userid": userId, "time": curr_date, "base64buffer": base64_message, "date": curr_date}
                        # message = json.dumps(message_format)

                        publisher.sender(message)
                        #print(f)

                        #print("send message to sender function")
                        # time.sleep(0.1)
                        # os.remove(f)

                else:
                    print("VISA")
                    wb1.delete_cols(3)
                    sheet = wb1
                    clist = []
                    # card = wb1.cell(row=2, column=1)
                    # s = str(card.value).split(",")[1].split(" ")[3]
                    # card_re = str(re.findall(r'[0-9]{4}',s))

                    b3 = sheet.cell(row=2, column=1)
                    s = str(b3.value).split(",")[1].split(" ")[3]
                    print(s)
                    r = re.compile('[0-9]{4}')
                    #print(r)
                    newlist = list(s)
                    #rint(newlist)
                    list1 = [newlist, 2]
                    if newlist != []:
                        clist.append(
                            ', '.join(newlist))  # add cards number to list

                    wbb = Workbook()
                    ws = wbb.active
                    x = [
                        'תאריך עסקה', 'שם בית העסק', 'סכום עסקה', 'קטגוריה',
                        'מספר כרטיס'
                    ]
                    counter = 1
                    for i in x:
                        ws.cell(row=1, column=counter, value=i)
                        counter += 1

                    for i in range(4, mr):
                        for j in range(1, 4):
                            c = wb1.cell(row=i, column=j)
                            ws.cell(row=i - 2, column=j).value = c.value
                            ws.cell(row=i - 2, column=5).value = s

                    f = 'c:\\OM\\output-visa-' + str(uuid.uuid4()) + '.xlsx'
                    wbb.save(filename=f)

                    with open(f, 'rb') as binary_file:
                        binary_file_data = binary_file.read()
                        base64_encoded_data = base64.b64encode(
                            binary_file_data)
                        base64_message = base64_encoded_data.decode('utf-8')
                        print("Before publish to yogev")

                        # message_format = {"userid": userId, "time": curr_date, "base64buffer": base64_message, "date": curr_date}
                        # message = json.dumps(message_format)

                        publisher.sender(message)
            except:
                print('cannot parsing xlsx file, the file is corrupted')

    else:
        print('Cannot parsing json from rabbit')
示例#41
0
def merged_data_function(xl_workbook,
                         sheet_name,
                         merged_data_cols,
                         data_cols,
                         data_rows,
                         extra_rows,
                         last_row_in_sheet,
                         spreadsheet_type,
                         column_header_locations,
                         column_position=1):
    """ Function to extract data from merged cells
    merged_data_cols is a list of tuples. Each tuple is in the format used by xlrd function merged_cells """

    sheet = xl_workbook.sheet_by_name(sheet_name)
    last_row = max(data_rows)
    other_rows = set(
        i for i in range(last_row_in_sheet + 1)
        if i not in data_rows and i > max(column_header_locations))
    all_rows = column_header_locations.union(other_rows)

    column_headings = pd.DataFrame()
    merged_meta_data = list(
        filter(lambda x: x[0] in all_rows, merged_data_cols))

    # Get the merged items that have the same column dimensions. These are understood to be subheadings.
    other_rows = set(
        i for i in range(last_row)
        if i not in data_rows and i > max(column_header_locations) - 1)
    all_rows = column_header_locations.union(other_rows)

    merged_meta_data_subheadings_potential = list(
        filter(lambda x: x[0] in other_rows, merged_data_cols))

    # Get the merged items that have the same column dimensions. These are understood to be subheadings.
    merged_meta_data_last_2_elements = [
        el[2:] for el in merged_meta_data_subheadings_potential
    ]
    duplicates = list(
        set([
            ele for ele in merged_meta_data_last_2_elements
            if merged_meta_data_last_2_elements.count(ele) > 1
        ]))

    merged_meta_data_subheadings = []
    for i in merged_meta_data_subheadings_potential:
        for j in duplicates:
            if i[2] == j[0] and i[3] == j[1]:
                merged_meta_data_subheadings.append(i)

    # duplicate_rows = set(el[0] for el in merged_meta_data_subheadings)
    #
    # merged_meta_data_subheadings = []
    # for i in merged_meta_data:
    #     for j in duplicates:
    #         if i[2] == j[0] and i[3] == j[1]:
    #             merged_meta_data_subheadings.append(i)

    # Remove the subheading rows
    merged_meta_data = [
        x for x in merged_meta_data if x not in merged_meta_data_subheadings
    ]
    subheading_rows = [i[0] for i in merged_meta_data_subheadings]
    rows_not_subheadings = [
        x for x in column_header_locations if x not in subheading_rows
    ]

    # Find how the merged data relates to the columns
    first_col = min(data_cols)
    all_positions = []
    for i in rows_not_subheadings:
        for j in data_cols:
            all_positions.append((i, i + 1, j, j + 1))

    all_merged_positions = []
    for i in merged_meta_data:
        j = i[2]  # start column
        k = 1
        while j < i[3]:
            for cells in range(i[2], i[2] + k):
                all_merged_positions.append((i[0], i[1], cells, i[2] + k))
            j += 1
            k += 1

    merged_meta_data_extended = copy.copy(merged_meta_data)
    merged_meta_data_extended.extend(i for i in all_positions
                                     if i not in all_merged_positions)
    # Needs to be sorted to ensure the descriptions line up properly with the data
    merged_meta_data_extended.sort(key=itemgetter(0, 2))

    values = [0]
    values.extend(col for col in range(min(data_cols), max(data_cols))
                  if col not in data_cols)
    keys = [0]
    k = 1
    for v in values:
        keys.append(k)
        k += 1

    empty_cols = dict(zip(keys, values))

    # sheet.merged_cells returns a list of tuples. Each tuple has 4 elements a,b,c,d
    # a,c is the top-left coordinate (row / col, starting with 0) where the merge starts.
    # b,d is the bottom right coordinate (row / col, starting with 1) where the merge finishes (who knows why?)
    column_titles = {}
    if spreadsheet_type == "Census TableBuilder":
        i = 1
        for r in column_header_locations:
            column_titles[r] = sheet.cell_value(rowx=r, colx=0)
            if column_titles[r] == '':
                column_titles[r] = "Column_description_title_" + str(i)
                i += 1
    else:
        for i, r in enumerate(rows_not_subheadings):
            column_titles[r] = "Row_description_title_" + str(i)

    for r in rows_not_subheadings:
        column_heading = column_titles[r]
        for i in merged_meta_data_extended:
            if i[0] == r:
                row_position = i[
                    2] - first_col  # row position in df (in other words, the column number)
                # Filter out entries that occur in empty columns
                empty_cols_filtered = dict(
                    filter(lambda elem: elem[1] < i[3], empty_cols.items()))
                if empty_cols_filtered:
                    row_position = row_position - max(
                        empty_cols_filtered, key=empty_cols_filtered.get)
                for k in range(i[2], i[3]):
                    cell = sheet.row(i[0])[i[2]]
                    if row_position >= 0:
                        if ctype_text.get(cell.ctype,
                                          'unknown type') == "xldate":
                            column_headings.loc[row_position, column_heading] = pd.to_datetime((cell.value - 25569) *
                                                                                               86400.0, unit='s').\
                                strftime('%d/%m/%Y')
                        else:
                            column_headings.loc[row_position,
                                                column_heading] = cell.value
                    row_position += 1

    columns_to_evaluate = [0]  # Assume the extra row info is all in column A
    for j in extra_rows:
        column_heading = 'Col_desc_' + str(column_position)
        for i in columns_to_evaluate:
            cell = sheet.row(j)[i]
            for row_position in range(0, len(data_cols)):
                if ctype_text.get(cell.ctype, 'unknown type') == "xldate":
                    column_headings.loc[row_position, column_heading] = pd.to_datetime((cell.value - 25569) *
                                                                                       86400.0, unit='s'). \
                        strftime('%d/%m/%Y')
                else:
                    column_headings.loc[row_position,
                                        column_heading] = cell.value
        column_position += 1

    return column_headings
示例#42
0
def locate_data(xl_workbook, data_sheets, allowed_blank_rows, data_type):
    """ Function to locate the data in the spreadsheet and assign it to a TableData class """

    # Initiate the first table
    tables = []
    table_number = -1

    for s in data_sheets:
        sheet = xl_workbook.sheet_by_name(s)
        found_table = False
        r = 0
        found_data = False
        looking_for_multiple_tables = False
        blank_row = False
        blank_row_count = 0
        quit_loop = False
        date_cols = []

        if data_type == "Time series":
            for i in range(sheet.nrows):
                row = sheet.row(i)
                if quit_loop:
                    break
                for idx, cell_obj in enumerate(row):
                    if cell_obj.value == "Series ID":
                        start_row = i + 1
                        quit_loop = True
                        break
        else:
            start_row = 0

        quit_loop = False

        if 'start_row' not in locals():
            start_row = 0

        # Find data
        for i in range(start_row, sheet.nrows):
            # if i < 9:
            #     continue
            if quit_loop:
                break
            row = sheet.row(i)
            c = 0
            if blank_row:
                blank_row_count += 1
            else:
                blank_row_count = 0
            if blank_row_count >= allowed_blank_rows:
                found_data = False
                found_table = False
                blank_row = False
                date_cols = []
                continue
            if found_data:
                looking_for_multiple_tables = True
                blank_row = True  # Temporary assignment, will be made false if something is found in the row
                blank_row_count = 0
                r += 1
            found_data = False
            for idx, cell_obj in enumerate(row):
                if isinstance(cell_obj.value, str):
                    # if "STANDARD ERROR" in cell_obj.value:
                    #     quit_loop = True
                    #     break
                    if not found_data:
                        if cell_obj.value in [
                                "Year", "year", "Years", "Date", "Month", "Day"
                        ]:
                            date_cols.append(idx)
                            continue
                # Return the cell 'weight' (700 is bold, 400 is 'normal')
                rd_xf = xl_workbook.xf_list[sheet.cell_xf_index(i, idx)]
                cell_font = xl_workbook.font_list[rd_xf.font_index].weight
                #cell_format = xl_workbook.format_map[rd_xf.format_key].format_str
                cell_type = ctype_text.get(cell_obj.ctype, 'unknown type')

                # Check if it is a total row (p.s. this is very specific, sometimes total row is at the top and bold
                if cell_type == "number" and not found_data and idx != 0:  # Top left data cell
                    left_of_data = sheet.row(i)[idx - 1].value
                #if cell_type == "number" and cell_format != "General" and \
                if cell_type == "number" and (
                        cell_font != 700
                        or left_of_data == "Total") and idx not in date_cols:
                    # Store info on location of data column
                    found_data = True
                    if not found_table:
                        tables.append(TableData(sheet_name=sheet.name))
                        table_number += 1
                        found_table = True
                    tables[table_number].add_row(i)
                    tables[table_number].add_col(idx)
                    c += 1
                # Sometimes there is a 'total' row that is bold. Let's not skip it.
                # If the table has been started, allow bold rows.
                # p.s this might be redundant if the total row is always called 'Total' as per above if statement
                #if found_table and cell_type == "number" and cell_format != "General" and cell_font == 700:
                if found_table and cell_type == "number" and cell_font == 700:
                    found_data = True
                    tables[table_number].add_row(i)
                    tables[table_number].add_col(idx)
                    c += 1
                if cell_type in ["number", "text"
                                 ] and looking_for_multiple_tables is True:
                    blank_row = False
                    blank_row_count = 0

    return tables
示例#43
0
def describe_col_headings(xl_workbook, sheet_name, data_rows, data_cols,
                          row_descriptions, top_row, top_header_row,
                          row_descriptions_header_row, spreadsheet_type,
                          last_row_in_sheet):
    """ Find column headings. There might be multiple column headings (above each other) that might be units ($, %, etc)
    or they might be merged cells """

    first_row = min(data_rows)
    last_col = max(data_cols)
    first_col = min(data_cols)
    sheet = xl_workbook.sheet_by_name(sheet_name)

    # Find merged column headings
    # sheet.merged_cells returns a list of tuples. Each tuple has 4 elements a,b,c,d
    # a,c is the top-left coordinate (row / col, starting with 0) where the merge starts.
    # b,d is the bottom right coordinate (row / col, starting with 1) where the merge finishes (who knows why?)
    assert isinstance(sheet.merged_cells, object)
    all_mergers = sheet.merged_cells

    rows_above_data = range(0, first_row)

    column_header_locations = set()
    blank_row = False
    found_a_non_blank_row = False
    found_a_column_header = False

    def check_rows(idx, merged_meta_data):
        if merged_meta_data:
            merged_col1 = list(zip(*merged_meta_data))[2]
            if idx >= first_col or idx in merged_col1:
                return True
            else:
                return False
        else:
            if idx >= first_col:
                return True
            else:
                return False

    blank_row = False
    found_a_non_blank_row = False
    for r in reversed(rows_above_data):
        if blank_row and found_a_non_blank_row and found_a_column_header:
            break
        row = sheet.row(r)
        merged_meta_data = list(filter(lambda x: x[0] == r, all_mergers))
        for idx, cell_obj in enumerate(row):
            # Return the cell 'weight' (700 is bold, 400 is 'normal')
            rd_xf = xl_workbook.xf_list[sheet.cell_xf_index(r, idx)]
            cell_font = xl_workbook.font_list[rd_xf.font_index].weight
            cell_type = ctype_text.get(cell_obj.ctype, 'unknown type')
            # If the column index is greater than the first data column or is in the first column of merged cells
            if check_rows(idx, merged_meta_data):
                # As soon as something is found in a cell, it is not a blank row and break the loop, go to next row
                if cell_obj.value == "":
                    blank_row = True
                # At least one of the column headers must be text or bold for this to work
                elif cell_type == "text" or cell_font == 700:
                    blank_row = False
                    found_a_non_blank_row = True
                    found_a_column_header = True
                    column_header_locations.add(r)
                    break
                else:
                    blank_row = False
                    found_a_non_blank_row = True

    #column_header_locations.discard(top_row)

    # Extra meta data above top left cell
    extra_meta_data = set()
    if spreadsheet_type == "Data cube":
        if row_descriptions_header_row:
            column_headers_already_included = {
                top_row, row_descriptions_header_row
            }
        else:
            column_headers_already_included = {top_row}
        if all_mergers:
            for r in column_header_locations:
                mergers_filtered = [tup for tup in all_mergers if tup[0] == r]
                if mergers_filtered:
                    for c in row_descriptions:
                        if c in list(zip(*mergers_filtered))[2]:
                            column_headers_already_included.add(r)

        rows_above_data = list(
            filter(lambda i: i not in column_headers_already_included,
                   [*rows_above_data]))

        blank_row = False
        for r in reversed(rows_above_data):
            if blank_row:
                break
            row = sheet.row(r)
            for idx, cell_obj in enumerate(row):
                # If the column index is greater than the first data column or is in the first column of merged cells
                if idx in row_descriptions:
                    rd_xf = xl_workbook.xf_list[sheet.cell_xf_index(r, idx)]
                    cell_format = xl_workbook.format_map[
                        rd_xf.format_key].format_str
                    # As soon as something is found in a cell, it is not a blank row and break the loop, go to next row
                    if cell_obj.value == "":
                        blank_row = True
                    # At least one of the column headers must be text for this to work
                    elif ctype_text.get(
                            cell_obj.ctype, 'unknown type'
                    ) == "text" or cell_format == 'General':
                        blank_row = False
                        extra_meta_data.add(r)
                        break
                    else:
                        blank_row = False

        blank_row = False
        for r in rows_above_data:
            if blank_row:
                break
            row = sheet.row(r)
            for idx, cell_obj in enumerate(row):
                # If the column index is greater than the first data column or is in the first column of merged cells
                if idx in row_descriptions:
                    rd_xf = xl_workbook.xf_list[sheet.cell_xf_index(r, idx)]
                    cell_format = xl_workbook.format_map[
                        rd_xf.format_key].format_str
                    # As soon as something is found in a cell, it is not a blank row and break the loop, go to next row
                    if cell_obj.value == "":
                        blank_row = True
                    # At least one of the column headers must be text for this to work
                    elif ctype_text.get(
                            cell_obj.ctype, 'unknown type'
                    ) == "text" or cell_format == 'General':
                        blank_row = False
                        extra_meta_data.add(r)
                        break
                    else:
                        blank_row = False

    else:
        columns_to_evaluate = range(0, max(row_descriptions))
        rows_above_top_header_row = range(0, top_header_row)
        column_headers_already_included = {top_header_row}
        if all_mergers:
            for r in column_header_locations:
                mergers_filtered = [tup for tup in all_mergers if tup[0] == r]
                if mergers_filtered:
                    for c in columns_to_evaluate:
                        if c in list(zip(*mergers_filtered))[2]:
                            column_headers_already_included.add(r)
                # for c in columns_to_evaluate:
                #     if r in list(zip(*all_mergers))[0] and c in list(zip(*all_mergers))[2]:
                #         column_headers_already_included.add(r)

        blank_row = False
        rows_above_data = list(
            filter(lambda i: i not in column_headers_already_included,
                   [*rows_above_top_header_row]))

        for r in reversed(rows_above_data):
            row = sheet.row(r)
            for idx, cell_obj in enumerate(row):
                # If the column index is greater than the first data column or is in the first column of merged cells
                if idx in columns_to_evaluate:
                    # As soon as something is found in a cell, it is not a blank row and break the loop, go to next row
                    if cell_obj.value == "":
                        blank_row = True
                    # At least one of the column headers must be text for this to work
                    elif ctype_text.get(cell_obj.ctype,
                                        'unknown type') == "text":
                        blank_row = False
                        extra_meta_data.add(r)
                        break
                    else:
                        blank_row = False

    other_rows = set(
        i for i in range(last_row_in_sheet + 1)
        if i not in data_rows and i > max(column_header_locations))
    all_rows = column_header_locations.union(other_rows)
    merged_meta_data = []
    for i in all_mergers:
        # Only keep the merged cells that are above the data; not to the left and not to the right
        #if i[0] in column_headers_and_rows_above_data and \
        #        i[2] >= first_col and i[1] - 1 in column_headers_and_rows_above_data and i[3] - 1 <= last_col:
        if i[0] in all_rows and i[2] <= last_col:
            merged_meta_data.append(i)

    return merged_meta_data, column_header_locations, extra_meta_data
示例#44
0
    worksheet.write(0, 0, 'Telephone Number')
    worksheet.write(0, 1, 'Review Note')
    siteType = '_rev_Reviewnotes.xlsx'

# Set column to A:A, the first column.

worksheet.set_column('A:A', 13)

# Read the slice from the first cell to the last accessible row in Excel.

col = xl_sheet.col_slice(0, 1, 1048576)

# Read each string line by line.

for (idx, cell_obj) in enumerate(col):
    cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
    cell_obj_str = str(cell_obj)

    # Cut the numbers to their appropriate format.

    # Does a dash, parenthesis, or none of those exist? That will decide the numFormat.

    if '-' in cell_obj_str:
        firstStart = cell_obj_str.index('-') - 3
        firstEnd = firstStart + 3
        secondStart = cell_obj_str.index('-') + 1
        secondEnd = secondStart + 3
        thirdStart = cell_obj_str.index('-') + 5
        thirdEnd = thirdStart + 4
        teleWho = cell_obj_str[firstStart:firstEnd] + cell_obj_str[
            secondStart:secondEnd] + cell_obj_str[thirdStart:thirdEnd]
示例#45
0
def callback(ch, method, properties, body):
    print(" [x] Received %r" % body)
    excel = body
    excel_dict = json.loads(excel)
    encoding_data = excel_dict["base64buffer"]
    decoded_data = base64.b64decode(encoding_data)
    file_extension = excel_dict["fileName"]
    userId = excel_dict["userId"]
    fileName = excel_dict["fileName"]
    curr_date = excel_dict["curr_date"]
    #message_format = {"userid": userId, "base64buffer": base64_message,
    #                  "date": curr_date, "fileName": filename}
    #message = json.dumps(message_format)
    print(userId)
    print(fileName)

    if "base64buffer" in excel_dict:
        if str(file_extension).split(".")[1] == 'xls':
            print('excel format : xls')
            try:
                wbb1 = Workbook()
                ws = wbb1.active
                #xl_workbook = open_workbook('c:\om\exampleMasterCard.xls') #           for read a file type
                xl_workbook = open_workbook(file_contents=decoded_data, on_demand=True)
                sheet_names = xl_workbook.sheet_names()
                #print(sheet_names)
                xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])

                xl_sheet = xl_workbook.sheet_by_index(0)
                #print('Sheet name: %s' % xl_sheet.name)
                row = xl_sheet.row(0)
                # print (row)
                #print('(Column #) type:value')
                for idx, cell_obj in enumerate(row):
                    cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
                    #print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value))

                num_cols = xl_sheet.ncols  # Number of columns
                for row_idx in range(0, xl_sheet.nrows):  # Iterate through rows
                    #print('-' * 40)
                    #print('Row: %s' % row_idx)  # Print row number
                    for col_idx in range(0, num_cols):  # Iterate through columns
                        cell_obj = xl_sheet.cell(row_idx, col_idx)  # Get cell object by row, col
                        #print('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))
                        ws.cell(row=row_idx + 1, column=col_idx + 1).value = cell_obj.value
                try:
                    for row_idx in range(0, xl_sheet.nrows):  # Iterate through rows
                        #print('-' * 40)
                        #print('Row: %s' % row_idx)  # Print row number
                        for col_idx in range(0, 1):  # Iterate through columns
                            cell_obj = xl_sheet.cell(row_idx, col_idx)  # Get cell object by row, col
                            #print('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))
                            #print(cell_obj)
                            a = str(cell_obj).split(":")
                            #print(a[0])
                            c = 'xldate'
                            try:
                                if a[0] == 'xldate':
                                    #print('AAAAAAA')
                                    d = str(a[1]).split(".")
                                    e = int(d[0])
                                    #print(e)

                                    datetime_date = xlrd.xldate_as_datetime(e, 0)
                                    date_object = datetime_date.date()
                                    string_date = date_object.isoformat()
                                    date_time_originl = datetime.datetime.strptime(string_date, '%Y-%d-%m')
                                    date_time_convert = datetime.date.strftime(date_time_originl, "%d/%m/%y")
                                    #print(date_time_convert)
                                    ws.cell(row=row_idx + 1, column=col_idx + 1).value = date_time_convert

                            except:
                                continue
                except:
                    print("can't convert the first format ")
                try:
                    for row_idx in range(0, xl_sheet.nrows):  # Iterate through rows
                        #print('-' * 40)
                        #print('Row: %s' % row_idx)  # Print row number
                        for col_idx in range(1, 2):  # Iterate through columns
                            cell_obj = xl_sheet.cell(row_idx, col_idx)  # Get cell object by row, col
                            #print('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))
                            #print(cell_obj)
                            a = str(cell_obj).split(":")
                            c = 'xldate'
                            try:
                                if a[0] == 'xldate':
                                    d = str(a[1]).split(".")
                                    e = int(d[0])
                                    datetime_date = xlrd.xldate_as_datetime(e, 0)
                                    date_object = datetime_date.date()
                                    string_date = date_object.isoformat()
                                    try:
                                        date_time_originl = datetime.datetime.strptime(string_date, '%Y-%m-%d')
                                        date_time_convert = datetime.date.strftime(date_time_originl, "%d/%m/%y")

                                        ws.cell(row=row_idx + 1, column=col_idx + 1).value = date_time_convert
                                    except:
                                        ws.cell(row=row_idx + 1, column=col_idx + 1).value = date_time_convert
                            except:
                                continue
                except:
                    print("can't convert the second format ")

                f1 = 'c:\\OM\\output-xls-' + str(uuid.uuid4()) + '.xlsx'
                wbb1.save(filename=f1)

                workbook = openpyxl.load_workbook(f1)
                wb1 = workbook.active
                mr = wb1.max_row
                mc = wb1.max_column

                match = wb1.cell(row=1, column=1)
                match2 = wb1.cell(row=4, column=1)
                match3 = wb1.cell(row=3, column=1)

                if match2.value == None:
                    print("OTSAR HAHAYAL")
                    wb1.delete_cols(1)
                    wb1.delete_cols(3)
                    wbb = Workbook()
                    ws = wbb.active
                    x = ['תאריך עסקה', 'שם בית העסק', 'סכום עסקה', 'קטגוריה', 'מספר כרטיס']
                    counter = 1
                    for i in x:
                        ws.cell(row=1, column=counter, value=i)
                        counter += 1
                    card = wb1.cell(row=4, column=1)
                    s = str(card.value).split(":")[1].split(" ")
                    card_match = s[0]
                    for i in range(7, mr - 1):
                        for j in range(1, 4):
                            # reading cell value from source excel file
                            c = wb1.cell(row=i, column=j)

                            # writing the read value to destination excel file
                            ws.cell(row=i - 5, column=j).value = c.value
                            ws.cell(row=i - 5, column=5).value = card_match

                    for j in range(1, 4):
                        for i in range(1, mr):
                            if ws.cell(row=i, column=1).value is None:
                                ws.delete_rows(i)
                            else:
                                continue
                    f = 'c:\\OM\\output-otsar-hahayal-' + str(uuid.uuid4()) + '.xlsx'
                    wbb.save(filename=f)

                    with open(f, 'rb') as binary_file:
                        binary_file_data = binary_file.read()
                        base64_encoded_data = base64.b64encode(binary_file_data)
                        base64_message = base64_encoded_data.decode('utf-8')
                        print("Before publish to yogev")
                        message_format = {"userid": userId, "base64buffer": base64_message, "curr_date": curr_date,
                                           "fileName": fileName}
                        message = json.dumps(message_format)
                        publisher.sender(message)

                else:
                    if match.value == None:
                        print("ISRACARD")
                        wb1.delete_cols(3, 2)
                        sheet = wb1
                        alist = []
                        blist = []

                        wbb = Workbook()
                        ws = wbb.active
                        x = ['תאריך עסקה', 'שם בית העסק', 'סכום עסקה', 'קטגוריה', 'מספר כרטיס']
                        counter = 1
                        for i in x:
                            ws.cell(row=1, column=counter, value=i)
                            counter += 1
                        for i in range(4, mr):
                            b3 = sheet.cell(row=i, column=1)
                            s = str(b3.value).split(" ")
                            r = re.compile('[0-9]{4}')
                            newlist = list(filter(r.match, s))
                            list1 = [newlist, i]
                            if newlist != []:
                                alist.append(i)  # add row number for cars number to list
                                blist.append(', '.join(newlist))  # add cards number to list
                            else:
                                continue
                        alist.append(mr + 5)

                        for a in range(0, len(alist) - 1):
                            for i in range(alist[a] + 3, alist[a + 1] - 2):
                                for j in range(1, 4):
                                    # reading cell value from source excel file
                                    c = wb1.cell(row=i, column=j)
                                    # writing the read value to destination excel file
                                    ws.cell(row=i - 5, column=j).value = c.value
                                    ws.cell(row=i - 5, column=5).value = str(blist[a])

                        for j in range(1, 4):
                            for i in range(1, mr):
                                if ws.cell(row=i, column=1).value is None:
                                    ws.delete_rows(i)
                                else:
                                    continue
                        f = 'c:\\OM\\output-isracard-' + str(uuid.uuid4()) + '.xlsx'
                        wbb.save(filename=f)

                        with open(f, 'rb') as binary_file:
                            binary_file_data = binary_file.read()
                            base64_encoded_data = base64.b64encode(binary_file_data)
                            base64_message = base64_encoded_data.decode('utf-8')
                            print("Before publish to yogev")
                            message_format = {"userId": userId, "base64buffer": base64_message, "curr_date": curr_date,
                                              "fileName": fileName}
                            message = json.dumps(message_format)
                            print(message)
                            publisher.sender(message)
                            #print("send message to sender function")

                    elif match3.value!=None:
                        print("VISA")
                        wb1.delete_cols(3)
                        sheet = wb1
                        clist = []
                        b3 = sheet.cell(row=2, column=1)
                        s = str(b3.value).split(",")[1].split(" ")[3]
                        #print(s)
                        r = re.compile('[0-9]{4}')
                        # print(r)
                        newlist = list(s)
                        # rint(newlist)
                        list1 = [newlist, 2]
                        if newlist != []:
                            clist.append(', '.join(newlist))  # add cards number to list

                        wbb = Workbook()
                        ws = wbb.active
                        x = ['תאריך עסקה', 'שם בית העסק', 'סכום עסקה', 'קטגוריה', 'מספר כרטיס']
                        counter = 1
                        for i in x:
                            ws.cell(row=1, column=counter, value=i)
                            counter += 1

                        for i in range(4, mr):
                            for j in range(1, 4):
                                c = wb1.cell(row=i, column=j)
                                ws.cell(row=i - 2, column=j).value = c.value
                                ws.cell(row=i - 2, column=5).value = s

                        f = 'c:\\OM\\output-visa-' + str(uuid.uuid4()) + '.xlsx'
                        wbb.save(filename=f)

                        with open(f, 'rb') as binary_file:
                            binary_file_data = binary_file.read()
                            base64_encoded_data = base64.b64encode(binary_file_data)
                            base64_message = base64_encoded_data.decode('utf-8')
                            print("Before publish to yogev")

                            message_format = {"userId": userId, "base64buffer": base64_message, "curr_date": curr_date,
                                              "fileName": fileName}
                            message = json.dumps(message_format)

                            publisher.sender(message)
                    else:
                        print('cannot parsing file, Doesnt match any card format')
                        error_type = 'cannot parsing file, Doesnt match any card format'
                        message_error_format = {"userId": userId, "errtype": error_type, "curr_date": curr_date,
                                                "fileName": fileName}
                        message_err = json.dumps(message_error_format)
                        publisher.sender_err(message_err)

            except:
                print('cannot parsing to xlsx file, the xls file is corrupted')
                error_type = 'cannot parsing xls file, the file is corrupted'
                message_error_format = {"userId": userId, "errtype": error_type, "curr_date": curr_date,
                                  "fileName": fileName}
                message_err = json.dumps(message_error_format)
                publisher.sender_err(message_err)


        if str(file_extension).split(".")[1] == 'xlsx':
            print('excel format : xlsx')
            try:
                xls_filelike = io.BytesIO(decoded_data)
                workbook = openpyxl.load_workbook(xls_filelike)
                wbb2 = Workbook()
                ws = wbb2.active
                data = workbook
                # parser.read_file(data)
                f2 = 'c:\\OM\\output-xlsx-' + str(uuid.uuid4()) + '.xlsx'
                workbook.save(filename=f2)

                workbook = openpyxl.load_workbook(f2)
                wb1 = workbook.active
                mr = wb1.max_row
                mc = wb1.max_column

                match = wb1.cell(row=1, column=1)
                match2 = wb1.cell(row=4, column=1)
                match3 = wb1.cell(row=3, column=1)

                if match2.value == None:
                    print("OTSAR HAHAYAL")
                    wb1.delete_cols(1)
                    wb1.delete_cols(3)
                    card = wb1.cell(row=4, column=1)
                    s = str(card.value).split(":")[1].split(" ")
                    print(s[0])
                    card_match = s[0]

                    for i in range(7, mr - 1):
                        for j in range(1, 4):
                            # reading cell value from source excel file
                            c = wb1.cell(row=i, column=j)

                            # writing the read value to destination excel file
                            ws.cell(row=i - 5, column=j).value = c.value
                            ws.cell(row=i - 5, column=5).value = card_match

                    f = 'c:\\OM\\output-otsar-hahayal-' + str(uuid.uuid4()) + '.xlsx'
                    wbb.save(filename=f)

                    with open(f, 'rb') as binary_file:
                        binary_file_data = binary_file.read()
                        base64_encoded_data = base64.b64encode(binary_file_data)
                        base64_message = base64_encoded_data.decode('utf-8')
                        print("Before publish to yogev")

                        message_format = {"userId": userId, "base64buffer": base64_message, "curr_date": curr_date,
                                           "fileName": fileName}
                        message = json.dumps(message_format)

                        publisher.sender(message)
                else:
                    if match.value==None:
                        print("ISRACARD")
                        wb1.delete_cols(3, 2)
                        sheet = wb1
                        alist = []
                        blist = []

                        wbb = Workbook()
                        ws = wbb.active
                        x = ['תאריך עסקה', 'שם בית העסק', 'סכום עסקה', 'קטגוריה', 'מספר כרטיס']
                        counter = 1
                        for i in x:
                            ws.cell(row=1, column=counter, value=i)
                            counter += 1
                        for i in range(4, mr):
                            b3 = sheet.cell(row=i, column=1)
                            s = str(b3.value).split(" ")
                            r = re.compile('[0-9]{4}')
                            newlist = list(filter(r.match, s))
                            list1 = [newlist, i]
                            if newlist != []:
                                alist.append(i)  # add row number for cars number to list
                                blist.append(', '.join(newlist))  # add cards number to list
                            else:
                                continue
                        alist.append(mr + 5)

                        for a in range(0, len(alist) - 1):
                            for i in range(alist[a] + 3, alist[a + 1] - 2):
                                for j in range(1, 4):
                                    # reading cell value from source excel file
                                    c = wb1.cell(row=i, column=j)
                                    # writing the read value to destination excel file
                                    ws.cell(row=i - 5, column=j).value = c.value
                                    ws.cell(row=i - 5, column=5).value = str(blist[a])

                        for j in range(1, 4):
                            for i in range(1, mr):
                                if ws.cell(row=i, column=1).value is None:
                                    ws.delete_rows(i)
                                else:
                                    continue
                        f = 'c:\\OM\\output-isracard' + str(uuid.uuid4())+'.xlsx'
                        wbb.save(filename=f)

                        #channel.queue_declare(queue='Yogevhello', durable=True)
                        with open(f, 'rb') as binary_file:
                            # print(binary_file)
                            binary_file_data = binary_file.read()
                            base64_encoded_data = base64.b64encode(binary_file_data)
                            base64_message = base64_encoded_data.decode('utf-8')
                            print("Before publish to yogev")
                            message_format = {"userId": userId, "base64buffer": base64_message, "curr_date": curr_date,
                                               "fileName": fileName}
                            message = json.dumps(message_format)
                            publisher.sender(message)

                    if match3.value==None:
                        #match3.value==None:
                        print("VISA")
                        wb1.delete_cols(3)
                        sheet = wb1
                        clist = []
                        b3 = sheet.cell(row=2, column=1)
                        s = str(b3.value).split(",")[1].split(" ")[3]
                        print(s)
                        r = re.compile('[0-9]{4}')
                        #print(r)
                        newlist = list(s)
                        #rint(newlist)
                        list1 = [newlist, 2]
                        if newlist != []:
                            clist.append(', '.join(newlist))  # add cards number to list


                        wbb = Workbook()
                        ws = wbb.active
                        x = ['תאריך עסקה', 'שם בית העסק', 'סכום עסקה', 'קטגוריה', 'מספר כרטיס']
                        counter = 1
                        for i in x:
                            ws.cell(row=1, column=counter, value=i)
                            counter+=1

                        for i in range(4,mr):
                            for j in range(1,4):
                                c = wb1.cell(row=i, column=j)
                                ws.cell(row=i - 2, column=j).value = c.value
                                ws.cell(row=i - 2, column=5).value = s

                        f = 'c:\\OM\\output-visa-' + str(uuid.uuid4()) + '.xlsx'
                        wbb.save(filename=f)

                        with open(f, 'rb') as binary_file:
                            binary_file_data = binary_file.read()
                            base64_encoded_data = base64.b64encode(binary_file_data)
                            base64_message = base64_encoded_data.decode('utf-8')
                            print("Before publish to yogev")

                            message_format = {"userId": userId, "base64buffer": base64_message, "curr_date": curr_date,
                                               "fileName": fileName}
                            message = json.dumps(message_format)

                            publisher.sender(message)
                    else:
                        print('cannot parsing file, Doesnt match any card format')
                        error_type = 'cannot parsing file, Doesnt match any card format'
                        message_error_format = {"userId": userId, "errtype": error_type, "curr_date": curr_date,
                                                "fileName": fileName}
                        message_err = json.dumps(message_error_format)
                        publisher.sender_err(message_err)
            except:
                print('cannot parsing xlsx file, the file is corrupted')
                error_type = 'cannot parsing xls file, the file is corrupted'
                message_error_format = {"userId": userId, "errtype": error_type, "curr_date": curr_date,
                                  "fileName": fileName}
                message_err = json.dumps(message_error_format)
                publisher.sender_err(message_err)
    else:
        print('Cannot parsing json from rabbit')
        error_type = 'cannot parsing Json Message, the Json Message is corrupted'
        message_error_format = {"userId": userId, "errtype": error_type, "curr_date": curr_date,
                                "fileName": fileName}
        message_err = json.dumps(message_error_format)
        publisher.sender_err(message_err)
示例#46
0
 def import_xls(self, model, file, header_map=None, extra_columns=None):
     """
     To map user column with database column
     - header_map = {'Name': 'name', 'Document', 'doc_id', }
     If there is additional fixed column value
     - extra_columns = [('name', 'ABC'), ('id', 10), ]
     If the import file have column id, we will use this column to create
     external id, and hence possible to return record id being created
     """
     decoded_data = base64.decodestring(file)
     ftemp = 'temp' + datetime.utcnow().strftime('%H%M%S%f')[:-3]
     f = open(ftemp + '.xls', 'wb+')
     f.write(decoded_data)
     f.seek(0)
     f.close()
     try:
         wb = xlrd.open_workbook(f.name)
     except xlrd.XLRDError:
         raise ValidationError(
             _('Invalid file format, only .xls or .xlsx file allowed!'))
     except Exception:
         raise
     st = wb.sheet_by_index(0)
     csv_file = open(ftemp + '.csv', 'wb')
     csv_out = unicodecsv.writer(csv_file,
                                 encoding='utf-8',
                                 quoting=unicodecsv.QUOTE_ALL)
     if st._cell_values:
         _HEADER_FIELDS = st._cell_values[0]
     id_index = -1  # -1 means no id
     xml_ids = []
     for nrow in xrange(st.nrows):
         if nrow == 0:  # Header, find id field
             header_values = [
                 x.lower().strip() for x in st.row_values(nrow)
             ]
             if 'id' in header_values:
                 id_index = header_values.index('id')
         if nrow > 0:
             row_values = st.row_values(nrow)
             for index, val in enumerate(row_values):
                 ctype = st.cell(nrow, index).ctype
                 type = ctype_text.get(ctype, 'unknown type')
                 if id_index == index and val:
                     # UUID replace id
                     xml_id = '%s.%s' % ('pabi_xls', uuid.uuid4())
                     row_values[index] = xml_id
                     xml_ids.append(xml_id)
                 elif type == 'empty' or type == 'text' \
                     or type == 'bool' or type == 'error' \
                         or type == 'blank':
                     row_values[index] = st.cell(nrow, index).value
                 elif type == 'number':
                     if not val:
                         row_values[index] = 0
                     else:
                         if not str(val).isdigit():
                             row_values[index] = int(val)
                         else:
                             row_values[index] = val
                 elif type == 'xldate':
                     str_date = self.xldate_to_datetime(
                         st.cell(nrow, index).value)
                     row_values[index] = str_date
                 else:
                     row_values[index] = st.cell(nrow, index).value
             csv_out.writerow(row_values)
         else:
             csv_out.writerow(st.row_values(nrow))
     csv_file.close()
     csv_file = open(ftemp + '.csv', 'r')
     file_txt = csv_file.read()
     csv_file.close()
     os.unlink(ftemp + '.xls')
     os.unlink(ftemp + '.csv')
     if not file_txt:
         raise ValidationError(_(str("File Not found.")))
     # Create xml_ids if not already assigned
     if id_index == -1:
         _HEADER_FIELDS.insert(0, 'id')
         xml_id = '%s.%s' % ('pabi_xls', uuid.uuid4())
         file_txt = self._add_column('id', xml_id, file_txt)
         xml_ids.append(xml_id)
     # Map column name
     if header_map:
         _HEADER_FIELDS = [
             header_map.get(x.lower().strip(), False)
             and header_map[x.lower()] or False for x in _HEADER_FIELDS
         ]
     # Add extra column
     if extra_columns:
         for column in extra_columns:
             _HEADER_FIELDS.insert(0, str(column[0]))
             file_txt = self._add_column(column[0], column[1], file_txt)
     Import = self.env['base_import.import']
     imp = Import.create({
         'res_model': model,
         'file': file_txt,
     })
     [errors] = imp.do(_HEADER_FIELDS, {
         'headers': True,
         'separator': ',',
         'quoting': '"',
         'encoding': 'utf-8'
     })
     if errors:
         raise ValidationError(_(str(errors[0]['message'])))
     return list(set(xml_ids))
示例#47
0
文件: parse.py 项目: strangesast/qr
    pn_header_cols = [(i, m.group('mfgr')) for i, col in enumerate(header_row) if i not in desc_header_cols and (m := header_pn_re.match(col))]

    for price_col, col in enumerate(header_row):
        if 'PRICE' in col or 'ABR' in col:
            break
    else:
        raise Exception('failed to find price column')

    for i in range(1, sheet.nrows):
        doc = {'retailer': retailer, 'file': str(f)[12:],
                'file_modified_at': f.stat().st_mtime}
        row = sheet.row(i)
        doc['description'] = [{'title': header_row[i], 'value': None} for i in desc_header_cols]
        price_c = row[price_col]
        t = ctype_text.get(price_c.ctype)
        if t == 'number':
            price = price_c.value
        elif t == 'empty':
            price = None
        elif t == 'error':
            error_text_from_code.get(price_c.value)
            price = None
        else:
            raise Exception('you f****d up')

        #assert ctype_text.get(price.ctype) != 'number', 'invalid price cell'
        doc['price'] = price
        for j in desc_header_cols:
            cell = row[j]
            t = ctype_text.get(cell.ctype)