示例#1
0
def xls2text_rank():
    """
    rank模型从excel读取队伍数据
    """
    if os.listdir("data_processed_rank/"):
        return
    year = 86
    part = ["W", "E"]
    for p in part:
        while True:
            if year < 10:
                sheets = pyExcelerator.parse_xls(
                    "data/0" + str(year) + p + ".xls", "utf-8")
                f = open("data_processed_rank/0" + str(year) + p, "a+")
            else:
                sheets = pyExcelerator.parse_xls(
                    "data/" + str(year) + p + ".xls", "utf-8")
                f = open("data_processed_rank/" + str(year) + p, "a+")
            data = ""
            for line in sheets:
                for i in range(1, 9):
                    for j in range(1, 24):
                        if j in [2, 23, 22]:
                            continue
                        data += str(line[1][(i, j)]) + "\t"
                    data += "\n"
            f.write(data)
            f.close()
            year += 1
            if year == 100:
                year = 0
            if year == 17:
                year = 86
                break
示例#2
0
def xls2csv(data_str):
  """
  Modification of xls2csv.py from pyExcelerator. BSD license.
  Copyright (C) 2005 Kiseliov Roman
  """
  buf_in = cStringIO.StringIO(data_str)
  buf_out = cStringIO.StringIO()
  for sheet_name, values in xl.parse_xls(buf_in, 'cp1251'): # parse_xls(arg) -- default encoding
    matrix = [[]]
    for row_idx, col_idx in sorted(values.keys()):
      v = values[(row_idx, col_idx)]
      if isinstance(v, unicode):
        v = v.encode('cp866', 'backslashreplace')
      else:
        v = `v`
      v = '"%s"' % v.strip()
      last_row, last_col = len(matrix), len(matrix[-1])
      while last_row <= row_idx:
        matrix.extend([[]])
        last_row = len(matrix)

      while last_col < col_idx:
        matrix[-1].extend([''])
        last_col = len(matrix[-1])

      matrix[-1].extend([v])

    for row in matrix:
      csv_row = ','.join(row)
      buf_out.write(csv_row + '\r\n')

  res = buf_out.getvalue()
  buf_in.close()
  buf_out.close()
  return res
示例#3
0
def process(file):

    first_sheet = False
    f = open(file, 'r')

    for sheet_name, values in xls.parse_xls(f, 'cp1251'):
        if not first_sheet:
            first_sheet = True

        day = parse_day(sheet_name)
        group = None

        for row_idx in count(2):
            if not (row_idx, 0) in values:
                break

            elif group is None or not ( (row_idx, 1) in values ):
                group = get_group(values[(row_idx, 0)])

            else:
                dish = {
                    'index': values[(row_idx, 0)],
                    'name': values[(row_idx, 1)],
                    'weight': values[(row_idx, 2)] if (row_idx, 2) in values else None,
                    'price': float(values[(row_idx, 3)])
                }

                yield day, group, dish
示例#4
0
def keepgoodrows(xlsfile, pickupfile):
    """Keep rows of an excel sheet if the spot id is contained in the pickup list"""

    outputxlsfile = xlsfile.replace(".xls", "_trimmed.xls")
    pickup = genfromtxt(pickupfile).astype("int64")
    wkbook = pyExcelerator.Workbook()
    wksheet = wkbook.add_sheet("Sheet1")
    keep = array([0])
    for sheet_name, values in pyExcelerator.parse_xls(xlsfile, "cp1251"):  # parse_xls(arg) -- default encoding
        print 'Sheet = "%s"' % sheet_name.encode("cp866", "backslashreplace")
        print "----------------"
        for row_idx, col_idx in sorted(values.keys()):
            if col_idx == 0 and row_idx != 0:
                v = values[(row_idx, col_idx)]
                if isinstance(v, unicode):
                    v = v.encode("cp866", "backslashreplace")
                    #                    print '(%d, %d) =' % (row_idx, col_idx), v
                if v in pickup:
                    keep = r_[keep, row_idx]
                    print v

        for row_idx, col_idx in sorted(values.keys()):
            if row_idx in keep:
                keep_out, = (keep == row_idx).nonzero()
                #                print "keep_out=",keep_out
                v = values[(row_idx, col_idx)]
                if isinstance(v, unicode):
                    v = v.encode("cp866", "backslashreplace")
                wksheet.write(keep_out.item(), col_idx, v)

        print "----------------"

    wkbook.save(outputxlsfile)
    def parse_xls(self, xls_file):
        for sheet_name, values in parse_xls(xls_file, 'utf8'): #'cp1251'):
            if sheet_name == "Products":
                products = [[]]
                property_keys = []
                logging.debug('Parsing Sheet = "%s"' % sheet_name.encode('cp866', 'backslashreplace'))
                for row_idx, col_idx in sorted(values.keys()):
                    if row_idx == 0: 
                        property_keys.append(values[(row_idx, col_idx)])
                        continue
                    v = values[(row_idx, col_idx)]
                    if isinstance(v, unicode): v = v.encode('utf8') #'cp866', 'backslashreplace')
                    else: v = str(v)
                    
                    last_row, last_col = len(products), len(products[-1])
                    while last_row < row_idx:
                        products.extend([[]])
                        last_row = len(products)
                
                    while last_col < col_idx:
                        products[-1].extend([''])
                        last_col = len(products[-1])
                
                    products[-1].extend([v])
                
                return (property_keys, products)

                    
示例#6
0
def xls2text_diff():
    """
    diff模型从excel读取队伍数据
    """
    if os.listdir("data_processed_diff/"):
        return
    year = 86
    part = ["W", "E"]
    for p in part:
        while True:
            if year < 10:
                sheets = pyExcelerator.parse_xls(
                    "data/0" + str(year) + p + ".xls", "utf-8")
                f = open("data_processed_diff/0" + str(year) + p, "a+")
            else:
                sheets = pyExcelerator.parse_xls(
                    "data/" + str(year) + p + ".xls", "utf-8")
                f = open("data_processed_diff/" + str(year) + p, "a+")
            data = ""
            for line in sheets:
                for i in range(1, 9):
                    for j in range(1, 24):
                        if j in [2, 23, 4, 5, 7, 8, 10, 11, 13, 14, 19]:
                            continue
                        # if j == 20:
                        # data += str(float(line[1][(i, j)]) - float(line[1][(i, j + 1)])) + "\t"
                        # continue
                        if j == 21:
                            data += str(-float(line[1][(i, j)])) + "\t"
                            continue
                        if j == 18:
                            data += str(-float(line[1][(i, j)])) + "\t"
                            continue
                        data += str(line[1][(i, j)]) + "\t"
                    data += "\n"
            f.write(data)
            f.close()
            year += 1
            if year == 100:
                year = 0
            if year == 17:
                year = 86
                break
示例#7
0
def GetExcelData(sXlsFilePath,
                 sSheetName=None,
                 iKeyRow=0,
                 iResultType=RESULT_TYPE_DICT,
                 iKeyCol=0):
    '''
	获取xls数据对象
	:param sXlsFilePath: xls文件路径
	:param sSheetName: Sheet分页名
	:param iKeyRow: 做key的行数
	:param iResultType: 返回值类型
	:param iKeyCol: 做key的列数(字典型才需要)
	:return: xls数据对象(字典或列表)
	'''
    if iResultType == RESULT_TYPE_DICT:
        raise "暂不支持"

    # from xls获取数据
    import pyExcelerator
    dSheetDataList = pyExcelerator.parse_xls(sXlsFilePath, "cp936")
    if sSheetName is not None:
        raise "暂不支持sheet"
    else:
        dSheetData = dSheetDataList[0][1]  # 第一个sheet

    # key-value配对及规整
    dColKey = {}  # iCol: (sKey, sType)
    for iCol in xrange(0, MAX_COL):
        tPos = iKeyRow, iCol
        if tPos in dSheetData:
            data = dSheetData.pop(tPos)  # pop colkey
            sKey = FormatData(tPos, data, "str")  # 行头分析
            dColKey[iCol] = AnlysColKey(sKey)
    dRowData = {}  # iRow: {sColKey: data}
    for tPos, data in dSheetData.iteritems():
        iRow, iColKey = tPos
        if iColKey not in dColKey:
            continue
        sColKey, sType = dColKey[iColKey]
        if iRow not in dRowData:
            dRowData[iRow] = {}
        data = FormatData(tPos, data, sType)  # 数据获取
        dRowData[iRow][sColKey] = data

    iRowList = dRowData.keys()
    iRowList.sort()
    dDataList = []
    for iRow in iRowList:
        dData = dRowData[iRow]
        for sColKey, sType in dColKey.itervalues():
            if sColKey not in dData:
                data = FormatData(None, None, sType)  # 缺省填充
                dData[sColKey] = data
        dDataList.append(dData)
    return dDataList
示例#8
0
文件: extras.py 项目: electionr/pycrp
        def parseExcelIDs(f):
            def sheetToRows(values):
                matrix = [[]]
                for row_idx, col_idx in sorted(values.keys()):
                    v = values[(row_idx, col_idx)]
                    if isinstance(v, unicode):
                        v = v.encode('cp866', 'backslashreplace')
                    else:
                        v = str(v)
                    last_row, last_col = len(matrix), len(matrix[-1])
                    while last_row < row_idx:
                        matrix.extend([[]])
                        last_row = len(matrix)

                    while last_col < col_idx:
                        matrix[-1].extend([''])
                        last_col = len(matrix[-1])

                    matrix[-1].extend([v])
                return matrix

            grabsheets = [('Members', 'members', [0,2,3,4]), ('CRP Industry Codes', 'categories', [0,1,2,3,4,5]), 
                ('Congressional Cmte Codes', 'congcmtes',[0,1]), ('Congressional Cmte Assignments', 'congcmte_posts', [0,2,3,4])] 

            #members: 0,2,4,3 for 2012

            for sheet_name, values in pyExcelerator.parse_xls(f): 
                matrix = [[]]
                sheet_title = sheet_name.encode('cp866', 'backslashreplace')
                for sheet_info in grabsheets:
                    if sheet_title.startswith(sheet_info[0]):
                        matrix = sheetToRows(values)
                        newmatrix = []
                        prefix = None #special case-make this the first value for all records in worksheet
                        if sheet_title.startswith('Members'):
                            prefix = sheet_title[-5:-2]
                        for row in matrix:
                            if len(row)>0 and not row[1].startswith("This information is being made available"):
                                newrow = []
                                if prefix:
                                    newrow.append(prefix)
                                for i in sheet_info[2]:
                                    if sheet_info[1]=='congcmte_posts' and i==4 and len(row)<5:
                                        thisval = ''
                                    else:
                                        thisval = row[i]
                                    try:                        
                                        newrow.append( thisval )
                                    except:
                                        logging.info( str(row) + " failed" )
                                newmatrix.append(newrow)
                        #get rid of headers
                        if sheet_info[1] in ['members', 'categories', 'congcmtes', 'congcmte_posts']:
                            newmatrix = newmatrix[1:]
                        writerows(newmatrix,sheet_info[1])
示例#9
0
 def importAllXlsData(self, request, f_name, codepage):
     """set data from XLS file on new or modified folder objects"""
     sitemanger = zapi.getParent(self)
     locSitemanager = zapi.getParent(sitemanger)
     root_folder = zapi.getParent(locSitemanager)
     parseRet = xl.parse_xls(f_name, codepage)
     for sheet_name, values in parseRet:
         # dbg # print "sheet_name: ", sheet_name
         if sheet_name in root_folder:
             folder = root_folder[sheet_name]
             self._xlsSheet2folder_(request, values, folder)
示例#10
0
文件: xls.py 项目: gic888/mienblocks
def read(f, **kwargs):
	objs=pe.parse_xls(f.name)
	data=[]
	for o in objs:
		dat=xl2data(o)
		if dat:
			data.append(dat)
	node={'tag':"Nmpml", 'attributes':{'Name':'0'}, 'elements':[], 'cdata':''}
	document = mdat.basic_tools.NmpmlObject(node)
	for d in data:
		document.newElement(d)
	return document
示例#11
0
 def read(self):
     book = pyExcelerator.parse_xls(self.filename)
     # a dictionary of value at each coordinate
     parsed_dictionary = book[0][1]
     # find maximum ID's of coordinates
     rows, cols = zip(*parsed_dictionary.keys())
     num_rows = max(rows) + 1
     num_cols = max(cols) + 1
     for row in range(num_rows):
         record = [
             parsed_dictionary.get((row, col)) for col in range(num_cols)
         ]
         yield record
     raise StopIteration()
示例#12
0
def import_dis(filename,exp_colpos,dis_colpos):
    print 'opening y'
    sheet_cont = parse_xls(filename)[0][1];
    max_rowpos = 2
    for (rowpos,colpos) in sheet_cont:
        if max_rowpos < rowpos: max_rowpos = rowpos
    rowpos = 2
    while rowpos <= max_rowpos: 
        exp_pos = (rowpos,exp_colpos)
        dis_pos = (rowpos,dis_colpos)
        #if exp_pos in sheet_cont and dis_pos not in sheet_cont:
        #    print sheet_cont[exp_pos]
        if exp_pos in sheet_cont and dis_pos in sheet_cont:
            exp_list.append(sheet_cont[exp_pos])
            dis_list.append(sheet_cont[dis_pos])
            #print sheet_cont[exp_pos], sheet_cont[dis_pos]
        rowpos += 1
示例#13
0
 def handle(self, *args, **options):
     book = pyExcelerator.parse_xls("excel.xls")
     patient_excel = book[0][1]
     diagnosis_excel = book[1][1]
     mkb = get_mkb(diagnosis_excel)
     for num_row in range(2, 2238):
         d_info = get_diagnosis_info(patient_excel, num_row, mkb)
         p_info = get_patient_info(patient_excel, num_row, d_info)
         if not p_info:
             continue
         patient = Patient(**p_info)
         patient.save()
         if not d_info:
             print patient_excel.get((num_row, 2))
             raise Exception(u'Нет информации по пациенту')
         d_info['patient'] = patient
         diagnosis = Diagnosis(**d_info)
         diagnosis.save()
示例#14
0
def readExcel(filePath):   #read excel --> list [(row_idx, col_idx), value]
    data=[]
    try:    
        #  
        for sheet_name, values in pyEx.parse_xls(filePath, 'cp1251'): # parse_xls(arg) -- default encoding
            print 'Sheet = "%s"' % sheet_name.encode('cp866', 'backslashreplace')
            print '----------------'
            for row_idx, col_idx in sorted(values.keys()):
                v = values[(row_idx, col_idx)]
                if isinstance(v, unicode):
                    v = v.encode('cp866', 'backslashreplace')
                print '(%d, %d) =' % (row_idx, col_idx), v
                data.append([(row_idx, col_idx), v])
            print '----------------'
            #print 'file read successful'
            return data
    except:
        print 'file read Error'
        print "Unexpected error:", sys.exc_info()
        return -1
示例#15
0
def pyxltr_xls2array(infilename):
    """
    get an array (one per excel sheet) of 2-d arrays holding the cells in that sheet
    """
    import pyExcelerator
    # open file
    try:     raw_sheets = pyExcelerator.parse_xls(infilename, 'cp1251')
    except:  print >>sys.stderr, "Can't read excel file %s" % (infilename) ; return []
    # pull out cell data
    excel_sheets = []
    for sheet_name, values in raw_sheets:
        if not values.keys(): continue
        # Column and row names
        (nrows, ncols) = map(max, zip(*values.keys()))
        # turn that sheet into list of lists
        excel_sheets.append({
            'sheet_name':       sheet_name,
            'sheet_data':       [ ([values.get((row, col), None) for col in range(0,ncols+1)])
                                 for row in range(0,nrows+1) ]
            })
    return excel_sheets
示例#16
0
 def extract_xls(self, locale, filename):
     path_xls = 'locale-xls-in/%s.xls' % locale
     if not os.path.exists(path_xls):
         return {}
     ifh = open(path_xls)
     content = pyExcelerator.parse_xls(ifh)
     ret = {}
     rownum = 0
     sheetnum = self.files.index(filename)
     while True:
         #print 'ok1'
         if (content[sheetnum][1].has_key((rownum, 0))):
             #print ' ok2'
             if (content[sheetnum][1].has_key((rownum, 1))):
                 #print '  ok3'
                 ret[content[sheetnum][1][(rownum, 0)]] = content[sheetnum][1][(rownum, 1)]
         else:
             break
         rownum += 1
     ifh.close()
     return ret
示例#17
0
def XiaoquToMetroSaveToDB() :
    client = MongoClient('localhost', 27017)
    db = client['lianjia']

    #    sDate = settings['MONGODB_DOCNAME'] + time.strftime('%Y%m%d', time.localtime(time.time()))
    #    coll = db['house_20170712']
    sDate = 'xiaoqu' + time.strftime('%Y%m%d', time.localtime(time.time()))
    sDate = 'xiaoqu_20170713'
    coll = db[sDate]

    p1 = Point()
    p1.lat = 37.480563
    p1.lng = 121.467113
    p2 = Point()
    p2.lat = 37.480591
    p2.lng = 121.467926

    print getDistance(p1, p2)

    sheets = pyExcelerator.parse_xls(u'E:/地铁坐标.xls')
    print sheets
#    for i in len(sheets[0][1]) / 2
    for i in range(0, len(sheets[0][1]) / 2) :
#        print  " {} : {} : {} ".format(i, sheets[0][1][(i,0)],sheets[0][1][(i,1)])
        pass

    for obj in coll.find():
        p1.lat = float(obj['gps_y'])
        p1.lng = float(obj['gps_x'])
        min_dist = 200000.0
        for i in range(0, len(sheets[0][1]) / 2):
            p2.lat = sheets[0][1][(i,1)]
            p2.lng = sheets[0][1][(i,0)]
            tmp_dist = getDistance(p1, p2)
            print " {} : {} : {} ".format(p1, p2, tmp_dist)
            if ( tmp_dist < min_dist ) :
                min_dist = tmp_dist
        obj['m_dist'] = min_dist
        print obj['m_dist']
        coll.update({'_id': obj['_id']}, {'$set': {'m_dist': obj['m_dist']}})
示例#18
0
文件: excel.py 项目: twz915/pyeasy
	def __init__(self,path,mode="r"):
		self.path = path
		self.mode = mode
		if mode=="r":
			if os.path.exists(path):
				try:
					self.data = parse_xls(path)
					self.sheets = self.data[0]
				except IndexError:
					self.sheets = ""
					print "Can't find sheet %d"%sheet
				except:
					self.sheets = ""
					print "Unknown Error!"
			else:
				print "Can't find %s" %path
		elif mode=="w":
			self.newexcel = Workbook()
			self.ws = self.newexcel.add_sheet('sheet1')

		elif mode=="a":#add to excel
			pass
示例#19
0
def keeptmprows(xlsfile, folderpath="tmp"):
    """Eliminate unnecessary lines from the excel file if the corresponding
    minimovies had not been created in the "tmp" folder"""
    outputxlsfile = xlsfile.replace(".xls", "_trimmed.xls")
    sids = array([])
    for file in os.listdir(folderpath):
        hi = file.rfind("_sp")
        if hi != -1:
            index = int(file[hi + 3 :])
            sids = r_[sids, index]
    wkbook = pyExcelerator.Workbook()
    wksheet = wkbook.add_sheet("Sheet1")
    keep = array([0])
    for sheet_name, values in pyExcelerator.parse_xls(xlsfile, "cp1251"):  # parse_xls(arg) -- default encoding
        print 'Sheet = "%s"' % sheet_name.encode("cp866", "backslashreplace")
        print "----------------"
        for row_idx, col_idx in sorted(values.keys()):
            if col_idx == 0 and row_idx != 0:
                v = values[(row_idx, col_idx)]
                if isinstance(v, unicode):
                    v = v.encode("cp866", "backslashreplace")
                    #                    print '(%d, %d) =' % (row_idx, col_idx), v
                if v in sids:
                    keep = r_[keep, row_idx]
                    print v

        for row_idx, col_idx in sorted(values.keys()):
            if row_idx in keep:
                keep_out, = (keep == row_idx).nonzero()
                #                print "keep_out=",keep_out
                v = values[(row_idx, col_idx)]
                if isinstance(v, unicode):
                    v = v.encode("cp866", "backslashreplace")
                wksheet.write(keep_out.item(), col_idx, v)

        print "----------------"

    wkbook.save(outputxlsfile)
示例#20
0
def xls2csv(data_str):
    """
  Modification of xls2csv.py from pyExcelerator. BSD license.
  Copyright (C) 2005 Kiseliov Roman
  """
    buf_in = cStringIO.StringIO(data_str)
    buf_out = cStringIO.StringIO()
    for sheet_name, values in xl.parse_xls(
            buf_in, 'cp1251'):  # parse_xls(arg) -- default encoding
        matrix = [[]]
        for row_idx, col_idx in sorted(values.keys()):
            v = values[(row_idx, col_idx)]
            if isinstance(v, unicode):
                v = v.encode('cp866', 'backslashreplace')
            else:
                v = ` v `
            v = '"%s"' % v.strip()
            last_row, last_col = len(matrix), len(matrix[-1])
            while last_row <= row_idx:
                matrix.extend([[]])
                last_row = len(matrix)

            while last_col < col_idx:
                matrix[-1].extend([''])
                last_col = len(matrix[-1])

            matrix[-1].extend([v])

        for row in matrix:
            csv_row = ','.join(row)
            buf_out.write(csv_row + '\r\n')

    res = buf_out.getvalue()
    buf_in.close()
    buf_out.close()
    return res
示例#21
0
# -*- coding:utf-8 -*-

import pyExcelerator
import sys

reload(sys)
sys.setdefaultencoding('utf-8')

# 获取所有sheets
sheets = pyExcelerator.parse_xls('LocalizableBack.xls')
# 获取第一个sheet的所有数据
sheet = sheets[0]
tuple = sheet[1]

length = len(tuple) / 2

# 记录第0列数据
list0 = []
for x in range(length):
    if tuple[x, 0]:
        string = tuple[x, 0]
        list0.append(string)

print("第0列数据:\n")
print(list0)

# 记录第1列数据
list1 = []
for x in range(length):
    if tuple[x, 1]:
        string = tuple[x, 1]
示例#22
0
文件: XLS.py 项目: kierzek/MUFINS
def parse_file(fname):
    rv = []
    for name, sheet in pxls.parse_xls(fname, encoding=None):
        rv.append((name, parse_sheet(sheet)))
    return rv
示例#23
0
#!/usr/bin/python
#sys.path.insert(0,'/home/tools/python/v2.4.5_centos5_vw8/lib/python2.4/site-packages')
import pyExcelerator

book=pyExcelerator.parse_xls('Team_2834_2016_Scouting_Database_v1_w4.xls')

#from team number to map of name to list of results
team_info={}

def add_info(team,name,point):
	if not team_info.has_key(team): team_info[team]={}
	info=team_info[team]
	if not info.has_key(name): info[name]=[]
	info[name].append(point)

for sheet in book:
	name=sheet[0]
	values=sheet[1]
	#print name,len(values)
	first=values.get((0,0))
	if first!=name or name=='Worldrank': continue
	#print name

	i=3
	while i<100:
		team_cell=values.get((i,1))
		if team_cell is None: break
		team=int(team_cell)
		#print 'team',team
		
		col=0
# -*- coding:utf-8 -*-

import pyExcelerator
import sys

reload(sys)
sys.setdefaultencoding('utf-8')

# 获取所有sheets
sheets = pyExcelerator.parse_xls('LocalizableBack.xls')
# 获取第一个sheet的所有数据
sheet = sheets[0]
tuple = sheet[1]

length = len(tuple) / 2

# 记录第0列数据
list0 = []
for x in range(length):
    if tuple[x, 0]:
        string = tuple[x, 0]
        list0.append(string)

print("第0列数据:\n")
print(list0)

# 记录第1列数据
list1 = []
for x in range(length):
    if tuple[x, 1]:
        string = tuple[x, 1]
示例#25
0
def keepokrows(xlsfile):
    """Read an excel file and extract well-marked tracks to meet appropriate conditions"""

    outputxlsfile = xlsfile.replace(".xls", "_ok.xls")
    outputidfile = xlsfile.replace(".xls", "_okid.txt")
    wkbook = pyExcelerator.Workbook()
    wksheet = wkbook.add_sheet("Sheet1")
    keep = array([0])
    for sheet_name, values in pyExcelerator.parse_xls(xlsfile, "cp1251"):  # parse_xls(arg) -- default encoding
        print 'Sheet = "%s"' % sheet_name.encode("cp866", "backslashreplace")
        print "----------------"
        for row_idx, col_idx in sorted(values.keys()):
            if row_idx != 0 and col_idx == 12:
                v = values[(row_idx, col_idx)]
                if isinstance(v, unicode):
                    v = v.encode("cp866", "backslashreplace")
                    #                    print '(%d, %d) =' % (row_idx, col_idx), v
                if v == "good" or v == "ok" or v == "lifetime":
                    keep = r_[keep, row_idx]
                    #                    print v
                    #        print keep,keep.size
        #        keep = unique(keep)
        trash = array([])
        for row_idx, col_idx in sorted(values.keys()):
            if row_idx in keep:
                v = values[(row_idx, col_idx)]
                if isinstance(v, unicode):
                    v = v.encode("cp866", "backslashreplace")
                if (
                    v == "unstable"
                    or v == "merging"
                    or v == "splitting"
                    or v == "bad tracking"
                    or v == "doublet"
                    or v == "masked"
                    or v == "not clear"
                ):
                    tobedeleted, = (keep == row_idx).nonzero()
                    trash = r_[trash, tobedeleted]
        trash = unique(trash)
        #        print trash,trash.size
        keep = delete(keep, trash, None)
        print keep, keep.size

        cids = array([])
        for row_idx, col_idx in sorted(values.keys()):
            if row_idx in keep:
                keep_out, = (keep == row_idx).nonzero()
                #                print "keep_out=",keep_out
                v = values[(row_idx, col_idx)]
                if isinstance(v, unicode):
                    v = v.encode("cp866", "backslashreplace")
                wksheet.write(keep_out.item(), col_idx, v)
                if row_idx != 0 and col_idx == 0:
                    cids = r_[cids, v]

        print "----------------"

    wkbook.save(outputxlsfile)
    cids = unique(cids)
    savetxt(outputidfile, cids[:, newaxis], fmt="%d")
    def handle(self, *args, **options):
        from inventory.models import Coin
        marketplace = MarketPlace.objects.get(slug="greatcoins")
        today = datetime.datetime.today()
        spreadsheet = open(args[0], 'r')

        for sheet_name, values in parse_xls(spreadsheet, 'utf8'): #'cp1251'):
            if sheet_name == "Sheet1" or sheet_name == "Sheet2":
                products = [[]]
                property_keys = []
                print ('Parsing Sheet = "%s"' % sheet_name.encode('cp866', 'backslashreplace'))
                for row_idx, col_idx in sorted(values.keys()):
                    if row_idx == 0: 
                        property_keys.append(values[(row_idx, col_idx)])
                        continue
                    v = values[(row_idx, col_idx)]
                    if isinstance(v, unicode): v = v.encode('utf8') #'cp866', 'backslashreplace')
                    else: v = str(v)
                    
                    last_row, last_col = len(products), len(products[-1])
                    while last_row < row_idx:
                        products.extend([[]])
                        last_row = len(products)
                
                    while last_col < col_idx:
                        products[-1].extend([''])
                        last_col = len(products[-1])
                    products[-1].extend([v])
                print (property_keys)
                
                
                sort_order_idx = 14
                sub_category_idx = 2
                category_idx = 1
                subcategories = set()
                categories = set()
                category_tree = {}
                
                category_objs = {}
                subcategory_objs = {}
                
                counter = 0
                for i, product in enumerate(products[1:]):
                    line = i + 3
#                    counter += 1
#                    if counter == 600: break
                    if len(product) < 6:
                        print "Line %d: invalid row" % (line)
                        continue
                    
                    try:
                        pcgs_number = int(decimal.Decimal(product[5]))
                    except ValueError:
                        print "Line %d : invalid PCGS" % (line)
                        continue
                    except Exception,e:
                        print "Line %d, %s. Could not recognize PCGS Number, line will not be saved.\n >> %s" % ((line), e, product)
                        continue
                    
                    if len(product) < sub_category_idx + 1:
                        print "Line %d: sub category is missing" % (line)
                        continue
                    
                    if len(product) < 15:
                        print "Line %d: sort order value don't exist. line will not be saved.\n >> %s" % (line, product)
                        continue
                        
                    category = product[category_idx]
                    subcategory = clean_subcategory(product[sub_category_idx])
                    if category == '':
                        print "Line %d: category is missing" % (line)
                        print product
                        continue

                    if category == "Hawaii": 
                        print "Line %d: Hawaii category found, break." % (line)
                        break
                    
                    category_obj = category_objs.get(category, None)
                    if category_obj is None:
                        try: 
                            category_obj = MarketCategory.objects.get(slug=slugify(category))
                        except MarketCategory.DoesNotExist:
                            category_obj = MarketCategory.objects.get_or_create(marketplace=marketplace, name=category)[0]
                        category_objs[category] = category_obj
                    
                    if subcategory == '':
                        subcategory_obj = None
                    else:
                        subcategory_obj = subcategory_objs.get(category + '_' + subcategory, None)  
                        if subcategory_obj is None:
                            try:
                                subcategory_obj = MarketSubCategory.objects.get(parent=category_obj, slug=slugify(subcategory))
                            except MarketSubCategory.DoesNotExist:
                                subcategory_obj = MarketSubCategory.objects.get_or_create(marketplace=marketplace,
                                                                                          parent=category_obj, 
                                                                                          name=subcategory)[0]
                            subcategory_objs[category + '_' + subcategory] = subcategory_obj
                            category_tree.setdefault(category, set())
                            category_tree[category].add(subcategory)
                    
                    coin, created = Coin.objects.get_or_create(pcgs_number=pcgs_number)
                    
                    if not created: #and today < coin.last_update:
                        #already updated today
                        #print "Line %d: coin already saved. %s" % (line, coin)
                        continue
                    
                    coin.category = category_obj
                    coin.subcategory = subcategory_obj
                    coin.country_code = 'us'
                    coin.pcgs_number = pcgs_number
                    coin.description = product[6]
                    coin.year_issued = product[7]
                    coin.actual_year = product[8]
                    coin.denomination = product[9]
                    coin.major_variety = product[10]
                    coin.die_variety = product[11]
                    coin.prefix = product[12]
                    coin.suffix = product[13]
                    if len(product) > 14:
                        coin.sort_order = product[14]
                    coin.heading = subcategory
                    if len(product) > 16:
                        coin.holder_variety = product[16]
                    if len(product) > 17:
                        coin.holder_variety_2 = product[17]
                    if len(product) > 18:
                        coin.additional_data = product[18]
                    coin.save()
    def handle(self, *args, **options):
        from inventory.models import Coin
        marketplace = MarketPlace.objects.get(slug="greatcoins")
        today = datetime.datetime.today()
        spreadsheet = open(args[0], 'r')

        for sheet_name, values in parse_xls(spreadsheet, 'utf8'):  #'cp1251'):
            if sheet_name == "Sheet1" or sheet_name == "Sheet2":
                products = [[]]
                property_keys = []
                print('Parsing Sheet = "%s"' %
                      sheet_name.encode('cp866', 'backslashreplace'))
                for row_idx, col_idx in sorted(values.keys()):
                    if row_idx == 0:
                        property_keys.append(values[(row_idx, col_idx)])
                        continue
                    v = values[(row_idx, col_idx)]
                    if isinstance(v, unicode):
                        v = v.encode('utf8')  #'cp866', 'backslashreplace')
                    else:
                        v = str(v)

                    last_row, last_col = len(products), len(products[-1])
                    while last_row < row_idx:
                        products.extend([[]])
                        last_row = len(products)

                    while last_col < col_idx:
                        products[-1].extend([''])
                        last_col = len(products[-1])
                    products[-1].extend([v])
                print(property_keys)

                sort_order_idx = 14
                sub_category_idx = 2
                category_idx = 1
                subcategories = set()
                categories = set()
                category_tree = {}

                category_objs = {}
                subcategory_objs = {}

                counter = 0
                for i, product in enumerate(products[1:]):
                    line = i + 3
                    #                    counter += 1
                    #                    if counter == 600: break
                    if len(product) < 6:
                        print "Line %d: invalid row" % (line)
                        continue

                    try:
                        pcgs_number = int(decimal.Decimal(product[5]))
                    except ValueError:
                        print "Line %d : invalid PCGS" % (line)
                        continue
                    except Exception, e:
                        print "Line %d, %s. Could not recognize PCGS Number, line will not be saved.\n >> %s" % (
                            (line), e, product)
                        continue

                    if len(product) < sub_category_idx + 1:
                        print "Line %d: sub category is missing" % (line)
                        continue

                    if len(product) < 15:
                        print "Line %d: sort order value don't exist. line will not be saved.\n >> %s" % (
                            line, product)
                        continue

                    category = product[category_idx]
                    subcategory = clean_subcategory(product[sub_category_idx])
                    if category == '':
                        print "Line %d: category is missing" % (line)
                        print product
                        continue

                    if category == "Hawaii":
                        print "Line %d: Hawaii category found, break." % (line)
                        break

                    category_obj = category_objs.get(category, None)
                    if category_obj is None:
                        try:
                            category_obj = MarketCategory.objects.get(
                                slug=slugify(category))
                        except MarketCategory.DoesNotExist:
                            category_obj = MarketCategory.objects.get_or_create(
                                marketplace=marketplace, name=category)[0]
                        category_objs[category] = category_obj

                    if subcategory == '':
                        subcategory_obj = None
                    else:
                        subcategory_obj = subcategory_objs.get(
                            category + '_' + subcategory, None)
                        if subcategory_obj is None:
                            try:
                                subcategory_obj = MarketSubCategory.objects.get(
                                    parent=category_obj,
                                    slug=slugify(subcategory))
                            except MarketSubCategory.DoesNotExist:
                                subcategory_obj = MarketSubCategory.objects.get_or_create(
                                    marketplace=marketplace,
                                    parent=category_obj,
                                    name=subcategory)[0]
                            subcategory_objs[category + '_' +
                                             subcategory] = subcategory_obj
                            category_tree.setdefault(category, set())
                            category_tree[category].add(subcategory)

                    coin, created = Coin.objects.get_or_create(
                        pcgs_number=pcgs_number)

                    if not created:  #and today < coin.last_update:
                        #already updated today
                        #print "Line %d: coin already saved. %s" % (line, coin)
                        continue

                    coin.category = category_obj
                    coin.subcategory = subcategory_obj
                    coin.country_code = 'us'
                    coin.pcgs_number = pcgs_number
                    coin.description = product[6]
                    coin.year_issued = product[7]
                    coin.actual_year = product[8]
                    coin.denomination = product[9]
                    coin.major_variety = product[10]
                    coin.die_variety = product[11]
                    coin.prefix = product[12]
                    coin.suffix = product[13]
                    if len(product) > 14:
                        coin.sort_order = product[14]
                    coin.heading = subcategory
                    if len(product) > 16:
                        coin.holder_variety = product[16]
                    if len(product) > 17:
                        coin.holder_variety_2 = product[17]
                    if len(product) > 18:
                        coin.additional_data = product[18]
                    coin.save()
示例#28
0
        dis_pos = (rowpos,dis_colpos)
        #if exp_pos in sheet_cont and dis_pos not in sheet_cont:
        #    print sheet_cont[exp_pos]
        if exp_pos in sheet_cont and dis_pos in sheet_cont:
            exp_list.append(sheet_cont[exp_pos])
            dis_list.append(sheet_cont[dis_pos])
            #print sheet_cont[exp_pos], sheet_cont[dis_pos]
        rowpos += 1
    #print len(exp_list), exp_list

import_dis(filename='linre_y.xls',exp_colpos=0,dis_colpos=13)
import_dis(filename='Link_HD_HDF_2008_27102010.xls',exp_colpos=8,dis_colpos=19)

num_finder = re.compile('^\d+')
data4hz = dict()
for sheet_name, values in parse_xls('linre_x_big.xls'):
    if sheet_name in exp_list:
        print sheet_name,' found'
        #if sheet_name=='F9' : print  values
        exp_index = exp_list.index(sheet_name)
        for rowpos, colpos in values:
            if rowpos>0 and colpos>0:
                ex = int(num_finder.match(values[(0,colpos)]).group())
                em = values[(rowpos,0)]
                hzkey = (ex,em)
                if hzkey not in data4hz: data4hz[hzkey] = [None] * len(exp_list)
                data4hz[hzkey][exp_index] = values[(rowpos,colpos)]
        print sheet_name,' parsed'

apicler = Pickler(open('linre_big.st','w'))
apicler.dump(dis_list)
示例#29
0
#!/usr/bin/env python

from urllib import urlretrieve
import os
import xlrd
import pyExcelerator as pyxl

src_url = (
    "http://www.prixagriculture.org/prix-des-produits?AAPrice"
    "[REGION_ID]=5&AAPrice[PROVINCE_ID]=&AAPrice[TYPE_MARKET_ID]=2&AAPrice"
    "[productFamilyId]=&exportXLS=true")
tmpfile = '/tmp/prices.xls'
try:
    #urlretrieve(src_url, tmpfile)
    book = pyxl.parse_xls(tmpfile)
    data = book[0][1]
    len_data = len(data) / 5
    print('{:*^40}'.format('Products'))
    for i in range(1, len_data):
        result = '{0:<30} {1:<5} {2:<5}'.format(data[i, 0], data[i, 2],
                                                data[i, 1])
        print(result)
finally:
    pass
#coding=utf-8
#######################################################
#filename:test_pyExcelerator_read.py
#author:defias
#date:2013-6-16
#function:读excel文件中的数据
#######################################################
import pyExcelerator

#parse_xls返回一个列表,每项都是一个sheet页的数据。
#每项是一个二元组(表名,单元格数据)。其中单元格数据为一个字典,键值就是单元格的索引(i,j)。如果某个单元格无数据,那么就不存在这个值
sheets = pyExcelerator.parse_xls('E:\\Code\\Python\\testdata.xls')
print sheets
示例#31
0
文件: xls2st.py 项目: vrepina/linre
from pyExcelerator import parse_xls
from pickle import Pickler
import re

sheet_cont = parse_xls('linre_y.xls')[0][1];
exp_colpos = 0
dis_colpos = 13
(exp_list,dis_list) = zip(*[
    ( sheet_cont[(i,exp_colpos)], sheet_cont[(i,dis_colpos)] )
    for i in range(2,11) 
    if (i,dis_colpos) in sheet_cont
])

num_finder = re.compile('^\d+')
data4hz = dict()
for sheet_name, values in parse_xls('linre_x.xls'):
    if sheet_name in exp_list:
        exp_index = exp_list.index(sheet_name)
        for rowpos, colpos in values:
            if rowpos>0 and colpos>0:
                ex = int(num_finder.match(values[(0,colpos)]).group())
                em = values[(rowpos,0)]
                hzkey = (ex,em)
                if hzkey not in data4hz: data4hz[hzkey] = [None] * len(exp_list)
                data4hz[hzkey][exp_index] = values[(rowpos,colpos)]

apicler = Pickler(open('linre.st','w'))
apicler.dump(dis_list)
apicler.dump(data4hz)
示例#32
0
def parse2():
    filefullname = r'D:\GFT\Test\test_excel.xls'
    sheet_values = parse_xls(filefullname)
    print sheet_values
示例#33
0
rb = xlrd.open_workbook(f)
wb = xlutils.copy.copy(rb)
ws = wb.get_sheet(0) #通过sheet_by_index()获取的sheet对象没有write()方法
ws.write(1, 1, 'changed!')
wb.add_sheet('sheetnnn2',cell_overwrite_ok=True)
wb.save(f)


from pyExcelerator import *
w=Workbook()  #创建工作簿
ws=w.add_sheet('Sheet1')  #添加工作表
ws.write(0, 0, 'A1')  #写入单元格
w.save('book.xls')

import pyExcelerator
sheets = pyExcelerator.parse_xls('book.xls')
print sheets


from openpyxl import Workbook
wb = Workbook()
ws = wb.active    ##获取active的sheet
ws['A1'] = 42   ##直接对指定cell进行数据赋值
ws.append([1, 2, 3])  ##追加一行数据
import datetime
ws['A2'] = datetime.datetime.now()   ##支持python格式自动转换
wb.save("sample.xlsx")

from openpyxl import load_workbook
wb = load_workbook(filename=r'e:/myexcel.xlsx')
sheetnames = wb.get_sheet_names()
示例#34
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/1/9 17:59
# @Author  : lingxiangxiang
# @File    : demonpyexcele.py

import pyExcelerator
#创建workbook和sheet对象
wb = pyExcelerator.Workbook()
ws = wb.add_sheet(u'第一页')
#设置样式
myfont = pyExcelerator.Font()
myfont.name = u'Times New Roman'
myfont.bold = True
mystyle = pyExcelerator.XFStyle()
mystyle.font = myfont
#写入数据,使用样式
ws.write(0, 0, u'hello lingxiangxinag!', mystyle)
#保存该excel文件,有同名文件时直接覆盖
wb.save('mini.xls')
print('创建excel文件完成!')


import pyExcelerator
#parse_xls返回一个列表,每项都是一个sheet页的数据。
#每项是一个二元组(表名,单元格数据)。其中单元格数据为一个字典,键值就是单元格的索引(i,j)。如果某个单元格无数据,那么就不存在这个值
sheets = pyExcelerator.parse_xls('mini.xls')
print(sheets)
示例#35
0
def ParseXlsData(filename):
    return xlst.parse_xls(filename)
def parse_cbs_data(args):
    if not os.access(args.file, os.R_OK):
        if args.verbose:
            print("Error: Cannot read CBS data file: " + args.file)
        return

    if args.verbose:
        print("Parsing XLS file: %s" % args.file)

    workbook = pyExcelerator.parse_xls(args.file)

    cbs_data = hash()

    for sheet_name, values in workbook:
        value = values[(0, 0)]

        if not value:
            if args.verbose:
                print("Warning: No value in cell 0,0, skipping worksheet: %s" %
                      sheet_name)

            continue

        header = []
        for i in range(0, 4):
            value = values[(0, i)]

            header.append(value)

        column = {}

        # Gemeentecode	Gemeentenaam	Provincienaam	Provinciecode
        if (header[0] == 'Gemeentecode' and header[1] == 'Gemeentenaam'
                and header[2] == 'Provincienaam'
                and header[3] == 'Provinciecode'):
            column = {
                'gemeentecode': 0,
                'gemeentenaam': 1,
                'provincienaam': 2,
                'provinciecode': 3,
            }

        # Gemeentecode	Provinciecode	Gemeentenaam	Provincienaam
        elif (header[0] == 'Gemeentecode' and header[1] == 'Provinciecode'
              and header[2] == 'Gemeentenaam'
              and header[3] == 'Provincienaam'):
            column = {
                'gemeentecode': 0,
                'provinciecode': 1,
                'gemeentenaam': 2,
                'provincienaam': 3,
            }

        # prov_Gemcode	provcode	Gemcodel	provcodel
        elif (header[0] == 'prov_Gemcode' and header[1] == 'provcode'
              and header[2] == 'Gemcodel' and header[3] == 'provcodel'):
            column = {
                'gemeentecode': 0,
                'provinciecode': 1,
                'gemeentenaam': 2,
                'provincienaam': 3,
            }

        # Gemcode	provcode	Gemcodel	provcodel
        elif (header[0] == 'Gemcode' and header[1] == 'provcode'
              and header[2] == 'Gemcodel' and header[3] == 'provcodel'):
            column = {
                'gemeentecode': 0,
                'provinciecode': 1,
                'gemeentenaam': 2,
                'provincienaam': 3,
            }

        # Gemcode	Gemcodel	provcode	provcodel
        elif (header[0] == 'Gemcode' and header[1] == 'Gemcodel'
              and header[2] == 'provcode' and header[3] == 'provcodel'):
            column = {
                'gemeentecode': 0,
                'gemeentenaam': 1,
                'provinciecode': 2,
                'provincienaam': 3,
            }

        # Unsupported format
        else:
            if args.verbose:
                order = ""

                i = 0
                for col in header:
                    if i > 0:
                        print(" | ")

                    print(col)

                    i += 1

                print("Error: Unsupported header order: %s" % order)

            return

        row_max = len(values) / len(column)

        for row in range(1, row_max):
            record = {}

            columns = [
                'gemeentecode', 'gemeentenaam', 'provinciecode',
                'provincienaam'
            ]
            for key in columns:
                value = values[(row, column[key])]

                record[key] = value

            if (not record.has_key('gemeentecode')
                    and not record.has_key('gemeentenaam')
                    and not record.has_key('provinciecode')
                    and not record.has_key('provincienaam')):
                if args.verbose:
                    print("Empty row, stoppping here.")
                break

            if not record.has_key('gemeentecode'):
                if args.verbose:
                    print("Empty 'gemeentecode' column (%s), stoppping here." %
                          column['gemeentecode'])
                break
            if not record.has_key('gemeentenaam'):
                if args.verbose:
                    print("Empty 'gemeentenaam' column (%s), stoppping here." %
                          column['gemeentenaam'])
                break
            if not record.has_key('provinciecode'):
                if args.verbose:
                    print(
                        "Empty 'provinciecode' column (%s), stoppping here." %
                        column['provinciecode'])
                break
            if not record.has_key('provincienaam'):
                if args.verbose:
                    print(
                        "Empty 'provincienaam' column (%s), stoppping here." %
                        column['provincienaam'])
                break

            columns = ['gemeentecode', 'provinciecode']
            for key in columns:
                record[key] = strip_leading_zeros(record[key])

            cbs_data[record['provinciecode']][record['gemeentecode']] = record

        break

    return cbs_data
示例#37
0

# BEGIN pyEx

#导出
from pyExcelerator import Workbook
import StringIO

wb = Workbook()
sheet = wb.add_sheet(u"XXX")
columns = [u'xxx', u'xxx']
for i, c in enumerate(columns):
    sheet.write(0, i, c)
for i, u in enumerate(XXX, 1):
    sheet.write(i, XXX, XXX)

datafile = StringIO.StringIO()
wb.save(datafile)
datafile.seek(0)
response = HttpResponse(datafile.read(), mimetype='application/vnd.ms-excel')
response['Content-Disposition'] = 'attachment; filename=%s.xls' % urllib.quote_plus((u"%sXXX统计" % XXX).encode('utf8'))
return response

#解析
from pyExcelerator import parse_xls
sheets = parse_xls(upload_file)
sheet_name, sheet_data = sheets[0]
data = sheet_data[(row, col)]

# END pyEx
示例#38
0
文件: extras.py 项目: electionr/pycrp
        def parseExcelIDs(f):
            def sheetToRows(values):
                matrix = [[]]
                for row_idx, col_idx in sorted(values.keys()):
                    v = values[(row_idx, col_idx)]
                    if isinstance(v, unicode):
                        v = v.encode('cp866', 'backslashreplace')
                    else:
                        v = str(v)
                    last_row, last_col = len(matrix), len(matrix[-1])
                    while last_row < row_idx:
                        matrix.extend([[]])
                        last_row = len(matrix)

                    while last_col < col_idx:
                        matrix[-1].extend([''])
                        last_col = len(matrix[-1])

                    matrix[-1].extend([v])
                return matrix

            grabsheets = [('Members', 'members', [0, 2, 3, 4]),
                          ('CRP Industry Codes', 'categories',
                           [0, 1, 2, 3, 4, 5]),
                          ('Congressional Cmte Codes', 'congcmtes', [0, 1]),
                          ('Congressional Cmte Assignments', 'congcmte_posts',
                           [0, 2, 3, 4])]

            #members: 0,2,4,3 for 2012

            for sheet_name, values in pyExcelerator.parse_xls(f):
                matrix = [[]]
                sheet_title = sheet_name.encode('cp866', 'backslashreplace')
                for sheet_info in grabsheets:
                    if sheet_title.startswith(sheet_info[0]):
                        matrix = sheetToRows(values)
                        newmatrix = []
                        prefix = None  #special case-make this the first value for all records in worksheet
                        if sheet_title.startswith('Members'):
                            prefix = sheet_title[-5:-2]
                        for row in matrix:
                            if len(row) > 0 and not row[1].startswith(
                                    "This information is being made available"
                            ):
                                newrow = []
                                if prefix:
                                    newrow.append(prefix)
                                for i in sheet_info[2]:
                                    if sheet_info[
                                            1] == 'congcmte_posts' and i == 4 and len(
                                                row) < 5:
                                        thisval = ''
                                    else:
                                        thisval = row[i]
                                    try:
                                        newrow.append(thisval)
                                    except:
                                        logging.info(str(row) + " failed")
                                newmatrix.append(newrow)
                        #get rid of headers
                        if sheet_info[1] in [
                                'members', 'categories', 'congcmtes',
                                'congcmte_posts'
                        ]:
                            newmatrix = newmatrix[1:]
                        writerows(newmatrix, sheet_info[1])
示例#39
0
def parse_file(fname):
    rv = []
    for name, sheet in pxls.parse_xls(fname, encoding=None):
        rv.append((name, parse_sheet(sheet)))
    return rv
            # XXX maybe do something better than just omit it?
            sys.stderr.write(("in sheet %s, field %d " +
                              "follows field %d; omitting this sheet\n") % (
                sheetname, value, current_field_number))
            return
        current_field_number = value
        field_names.append(cells[row, 1])
    print ';'.join([sheetname.replace('Sch ', 'S').upper()] + field_names)

def sheets_having_col_seq(sheets):
    """Sheets describing a record format say COL SEQ in the first column,
    in the first few rows.  This generator finds those sheets.

    """
    for name, cells in sheets:
        for row in range(20):
            if cells.get((row, 0)) == 'COL':
                assert cells.get((row+1, 0)) == 'SEQ'
                yield name, cells
                break
        else:
            sys.stderr.write("sheet %s has no COL SEQ; omitting this sheet\n"
                             % name)

def get_all_field_names(sheets):
    """Outputs the field names for all record types."""
    for sheet in sheets_having_col_seq(sheets): get_field_names(sheet)

if __name__ == '__main__':
    get_all_field_names(pyExcelerator.parse_xls(sys.argv[1]))
示例#41
0
def ParseXlsData(filename):
    return xlst.parse_xls(filename)
示例#42
0
def parse_cbs_data(args):
    if not os.access(args.file, os.R_OK):
        if args.verbose:
            print("Error: Cannot read CBS data file: "+ args.file)
        return

    if args.verbose:
        print("Parsing XLS file: %s" % args.file)

    workbook = pyExcelerator.parse_xls(args.file)

    cbs_data = hash()

    for sheet_name, values in workbook:
        value = values[(0, 0)]

        if not value:
            if args.verbose:
                print("Warning: No value in cell 0,0, skipping worksheet: %s" % sheet_name)

            continue

        header = []
        for i in range(0, 6):
            if not values.has_key((0, i)):
                break

            value = values[(0, i)]

            header.append(value)

        column = {}

        # Gemeentecode	Gemeentenaam	Provincienaam	Provinciecode
        if(
            header[0] == 'Gemeentecode'  and
            header[1] == 'Gemeentenaam'  and
            header[2] == 'Provincienaam' and
            header[3] == 'Provinciecode'
          ):
                column = {
                           'gemeentecode'  : 0,
                           'gemeentenaam'  : 1,
                           'provincienaam' : 2,
                           'provinciecode' : 3,
                         }

        # Gemeentecode	Provinciecode	Gemeentenaam	Provincienaam
        elif(
              header[0] == 'Gemeentecode'  and
              header[1] == 'Provinciecode' and
              header[2] == 'Gemeentenaam'  and
              header[3] == 'Provincienaam'
            ):
                column = {
                           'gemeentecode'  : 0,
                           'provinciecode' : 1,
                           'gemeentenaam'  : 2,
                           'provincienaam' : 3,
                         }

        # prov_Gemcode	provcode	Gemcodel	provcodel
        elif(
              header[0] == 'prov_Gemcode' and
              header[1] == 'provcode'     and
              header[2] == 'Gemcodel'     and
              header[3] == 'provcodel'
            ):
                column = {
                           'gemeentecode'  : 0,
                           'provinciecode' : 1,
                           'gemeentenaam'  : 2,
                           'provincienaam' : 3,
                         }

        # Gemcode	provcode	Gemcodel	provcodel
        elif(
              header[0] == 'Gemcode'  and
              header[1] == 'provcode' and
              header[2] == 'Gemcodel' and
              header[3] == 'provcodel'
            ):
                column = {
                           'gemeentecode'  : 0,
                           'provinciecode' : 1,
                           'gemeentenaam'  : 2,
                           'provincienaam' : 3,
                         }

        # Gemcode	Gemcodel	provcode	provcodel
        elif(
              header[0] == 'Gemcode'  and
              header[1] == 'Gemcodel' and
              header[2] == 'provcode' and
              header[3] == 'provcodel'
            ):
                column = {
                           'gemeentecode'  : 0,
                           'gemeentenaam'  : 1,
                           'provinciecode' : 2,
                           'provincienaam' : 3,
                         }

        # Gemeentecode	GemeentecodeGM	Gemeentenaam	Provinciecode	ProvinciecodePV	Provincienaam
        elif(
              header[0] == 'Gemeentecode'    and
              header[1] == 'GemeentecodeGM'  and
              header[2] == 'Gemeentenaam'    and
              header[3] == 'Provinciecode'   and
              header[4] == 'ProvinciecodePV' and
              header[5] == 'Provincienaam'
            ):
                column = {
                           'gemeentecode'    : 0,
                           'gemeentecodegm'  : 1,
                           'gemeentenaam'    : 2,
                           'provinciecode'   : 3,
                           'provinciecodepv' : 4,
                           'provincienaam'   : 5,
                         }

        # Unsupported format
        else:
            if args.verbose:
                order = ""

                i = 0;
                for col in header:
                    if i > 0:
                        print(" | ")

                    print(col)

                    i += 1;

                print("Error: Unsupported header order: %s" % order)

            return

        row_max = len(values)/len(column)

        for row in range(1, row_max):
            record = {};

            columns = ['gemeentecode', 'gemeentenaam', 'provinciecode', 'provincienaam']
            for key in columns:
                value = values[(row, column[key])]

                record[key] = value

            if(
                not record.has_key('gemeentecode') and
                not record.has_key('gemeentenaam') and
                not record.has_key('provinciecode') and
                not record.has_key('provincienaam')
              ):
                if args.verbose:
                    print("Empty row, stoppping here.")
                break

            if not record.has_key('gemeentecode'):
                if args.verbose:
                    print("Empty 'gemeentecode' column (%s), stoppping here." % column['gemeentecode'])
                break
            if not record.has_key('gemeentenaam'):
                if args.verbose:
                    print("Empty 'gemeentenaam' column (%s), stoppping here." % column['gemeentenaam'])
                break
            if not record.has_key('provinciecode'):
                if args.verbose:
                    print("Empty 'provinciecode' column (%s), stoppping here." % column['provinciecode'])
                break
            if not record.has_key('provincienaam'):
                if args.verbose:
                    print("Empty 'provincienaam' column (%s), stoppping here." % column['provincienaam'])
                break

            columns = ['gemeentecode', 'provinciecode']
            for key in columns:
                record[key] = strip_leading_zeros(record[key]);

            cbs_data[record['provinciecode']][record['gemeentecode']] = record;

        break

    return cbs_data
示例#43
0
 def handleSubmit(self, action):
     """submit was pressed"""
     if 'xlsdata' in self.widgets:
         fields = self.allFields
         codepage = self.widgets['codepage'].value[0]
         fileWidget = self.widgets['xlsdata']
         fileUpload = fileWidget.extract()
         filename = datetime.now().strftime('in_%Y%m%d%H%M%S.xls')
         f_handle, f_name = tempfile.mkstemp(filename)
         outf = open(f_name, 'wb')
         outf.write(fileUpload.read())
         outf.close()
         parseRet = xl.parse_xls(f_name, codepage)
         os.remove(f_name)
         #
         allAttributes = {}
         for interface in implementedBy(self.factory):
             for i_attrName in interface:
                 i_attr = interface[i_attrName]
                 if IField.providedBy(i_attr):
                     allAttributes[i_attrName] = i_attr
         #
         for sheet_name, values in parseRet:
             matrix = [[]]
             for row_idx, col_idx in sorted(values.keys()):
                 v = values[(row_idx, col_idx)]
                 if isinstance(v, unicode):
                     v = u"%s" % v  # v.encode(codepage, 'backslashreplace')
                 else:
                     v = ` v `
                 v = u'%s' % v.strip()
                 last_row, last_col = len(matrix), len(matrix[-1])
                 while last_row <= row_idx:
                     matrix.extend([[]])
                     last_row = len(matrix)
                 while last_col < col_idx:
                     matrix[-1].extend([''])
                     last_col = len(matrix[-1])
                 matrix[-1].extend([v])
             attrNameList = matrix[0]
             attrValMatrix = matrix[1:]
             for attrValVector in attrValMatrix:
                 attrDict = {}
                 for attrIndex, attrVal in enumerate(attrValVector):
                     attrDict[attrNameList[attrIndex]] = attrVal
                 # ---------------------------------------
                 if attrDict.has_key('IntID'):
                     attrDict.pop('IntID')
                 if attrDict.has_key('objectID'):
                     attrObjectID = attrDict.pop('objectID')
                     oldObj = self.context[attrObjectID]
                     for attrName, newValString in attrDict.items():
                         #print u"ddd4-> %s" % (attrName)
                         attrField = allAttributes[attrName]
                         #print u"type(%s): %s" % (attrField, type(attrField))
                         #                            if attrName == "rooms":
                         if IChoice.providedBy(attrField):
                             v_widget = getMultiAdapter(\
                                             (attrField,self.request),
                                             interfaces.IFieldWidget)
                             v_widget.context = oldObj
                             v_dataconverter = queryMultiAdapter(\
                                             (attrField, v_widget),
                                             interfaces.IDataConverter)
                             if len(newValString) > 0:
                                 newVal = v_dataconverter.toFieldValue(
                                     [newValString])
                             else:
                                 newVal = v_dataconverter.toFieldValue([])
                         else:
                             if attrName == "isTemplate":
                                 v_widget = checkbox.SingleCheckBoxFieldWidget(\
                                             attrField,self.request)
                             else:
                                 v_widget = getMultiAdapter(\
                                                 (attrField,self.request),
                                                 interfaces.IFieldWidget)
                             v_widget.context = oldObj
                             v_dataconverter = queryMultiAdapter(\
                                             (attrField, v_widget),
                                             interfaces.IDataConverter)
                             if ICollection.providedBy(attrField):
                                 if len(newValString) > 0:
                                     newVal = v_dataconverter.toFieldValue(
                                         newValString.split(';'))
                                 else:
                                     newVal = v_dataconverter.toFieldValue(
                                         [])
                             else:
                                 newVal = v_dataconverter.toFieldValue(
                                     newValString)
                         if getattr(oldObj, attrName) != newVal:
                             setattr(oldObj, attrName, newVal)
                             dcore = IWriteZopeDublinCore(oldObj)
                             dcore.modified = datetime.utcnow()
                             if attrName == "ikName":
                                 IBrwsOverview(oldObj).setTitle(newVal)
                 else:
                     oldObj = None
                     # new Object
                     #                        newObj = createObject(self.factoryId)
                     #                        newObj.__post_init__()
                     dataVect = {}
                     for attrName, newValString in attrDict.items():
                         attrField = allAttributes[attrName]
                         if IChoice.providedBy(attrField):
                             v_widget = getMultiAdapter(\
                                             (attrField,self.request),
                                             interfaces.IFieldWidget)
                             v_dataconverter = queryMultiAdapter(\
                                             (attrField, v_widget),
                                             interfaces.IDataConverter)
                             if len(newValString) > 0:
                                 newVal = v_dataconverter.toFieldValue(
                                     [newValString])
                             else:
                                 newVal = v_dataconverter.toFieldValue([])
                         else:
                             if attrName == "isTemplate":
                                 v_widget = checkbox.SingleCheckBoxFieldWidget(\
                                             attrField,self.request)
                             else:
                                 v_widget = getMultiAdapter(\
                                                 (attrField,self.request),
                                                 interfaces.IFieldWidget)
                             v_dataconverter = queryMultiAdapter(\
                                             (attrField, v_widget),
                                             interfaces.IDataConverter)
                             if ICollection.providedBy(attrField):
                                 if len(newValString) > 0:
                                     newVal = v_dataconverter.toFieldValue(
                                         newValString.split(';'))
                                 else:
                                     newVal = v_dataconverter.toFieldValue(
                                         [])
                             else:
                                 newVal = v_dataconverter.toFieldValue(
                                     newValString)
                         dataVect[str(attrName)] = newVal
                         #setattr(newObj, attrName, newVal)
                     #self.context.__setitem__(newObj.objectID, newObj)
                     #print "dataVect: ", dataVect
                     newObj = self.factory(**dataVect)
                     newObj.__post_init__()
                     if oldObj is not None:
                         dcore = IWriteZopeDublinCore(oldObj)
                         dcore.modified = datetime.utcnow()
                     IBrwsOverview(newObj).setTitle(dataVect['ikName'])
                     self.context[newObj.objectID] = newObj
                     if hasattr(newObj, "store_refs"):
                         newObj.store_refs(**dataVect)
                     notify(ObjectCreatedEvent(newObj))
     url = absoluteURL(self.context, self.request)
     self.request.response.redirect(url)