Пример #1
0
 def Create_Specification(self,
                          file_name,
                          mark_pos=1,
                          name_pos=2,
                          cnt_pos=3,
                          page_name='Перечень элементов'):
     '''
     Создание спецификации по файлу перечня элементов
     :return:
     '''
     try:
         data_rx = get_data(file_name)
         try:
             elements_list = self.Del_Spaces(data_rx[page_name])
         except KeyError:
             QtWidgets.QMessageBox.critical(self, 'Ошибка',
                                            'Нет такой страницы в файле',
                                            QtWidgets.QMessageBox.Ok)
             return
         Spec = []
         scan_pos = 0
         # перебираем все строки таблицы
         for row_to_scan in elements_list:
             # проверяем длину строки на минимальную длину
             # max_pos = max([mark_pos, name_pos, cnt_pos])
             if len(row_to_scan) > name_pos:
                 Name = row_to_scan[name_pos]
                 if Name != '':
                     # просматриваем все оставшиеся строки
                     for row in elements_list[scan_pos + 1:]:
                         # находим максимальную длину строки
                         if len(row) > name_pos:
                             if row[name_pos] == Name:
                                 row_to_scan[
                                     mark_pos] += ',' + row[mark_pos]
                                 row_to_scan[cnt_pos] += row[cnt_pos]
                                 row.clear()
                     if row_to_scan:
                         Spec.append(row_to_scan)
             else:
                 if len(elements_list[scan_pos - 1]):
                     Spec.append(row_to_scan)
             scan_pos += 1
         # читаем файл заново и добавляем лист со спецификацией
         data_rx = get_data(file_name)
         data_rx.update({"Спецификация": Spec})
         save_data(file_name, data_rx)
         QtWidgets.QMessageBox.information(
             self, 'Информация', 'Спецификация успешно сформирована',
             QtWidgets.QMessageBox.Ok)
     except Exception as EXP:
         QtWidgets.QMessageBox.critical(
             self, 'Ошибка обработки перечня элементов', str(EXP),
             QtWidgets.QMessageBox.Ok)
def readDatabase(rootDirectory):
    numImages = 0

    imageList = []
    targetList = []

    print(rootDirectory)
    xlsName = glob.glob(rootDirectory + "/*.xls")
    data = get_data(xlsName[0])
    xlsList = list(data.items()[0][1][1:])

    i = 0
    for row in xlsList:
        img = cv2.imread(rootDirectory + '/' + str(row[0]))
        img = preProcess.preProcessImage(img)

        imageList.append(img.astype(numpy.float32))
        if (row[2] == 0):
            targetList.append([1, 0, 0, 0])
        if (row[2] == 1):
            targetList.append([0, 1, 0, 0])
        if (row[2] == 2):
            targetList.append([0, 0, 1, 0])
        if (row[2] == 3):
            targetList.append([0, 0, 0, 1])
        i = i + 1

    numImages = i

    imageList = numpy.reshape(imageList, (numImages, 3, 512, 512))
    targetList = numpy.float32(targetList)
    targetList = numpy.reshape(targetList, (numImages, 4))

    return [imageList, targetList]
Пример #3
0
def read_xls_file():
    xls_data = get_data(r"data.xlsx")
    for sheet_n in xls_data.keys():
        data = xls_data[sheet_n]
    for i in range(1, len(data)):
        print(data[i][0])
        download_little_file(data[i][15], 'mp3/' + data[i][0] + '.mp3')
def fetchingdata(path, c):
	filename = 'crimes_count_women_01n11_part2.xls'
	ra = ex.get_data(path + '/' + filename)
	flag = False # leaving the headers

	no = 0
	c_id = 1
	g_id01 = 1
	g_id11 = 37

	for row in ra:
		if flag == False:
			flag = True
		else:
			state = row[0].encode('utf-8')
			type_crime = row[1].encode('utf-8')

			query = 'INSERT INTO CRIME VALUES(\'' + str(c_id) + '\',\'' + str(g_id01) + '\',\'' + str(g_id01) + '\',\'' + state + '\',\'' + type_crime + '\',' + `2001` + ',' + `row[2]` + ')'
			c.execute(query)
			c_id += 1

			query = 'INSERT INTO CRIME VALUES(\'' + str(c_id) + '\',\'' + str(g_id11) + '\',\'' + str(g_id11) + '\',\'' + state + '\',\'' + type_crime + '\',' + `2011` + ',' + `row[3]` + ')'
			c.execute(query)
			c_id += 1

			if g_id01 == 36:
				g_id01 = 1
				g_id11 = 37

			no += 2

	print no , "Rows inserted."
def read_xls_file_print(fil):
    """读取xls文件,并打印出结果  """
    """fil为读取文件的绝对路径"""
    data = get_data(fil)
    print("数据格式:", type(data))
    for sheet_n in data.keys():
        print(sheet_n, ":", data[sheet_n])
Пример #6
0
def fetchingdata(path, c):
    filename = 'population_census2011.xls'
    ra = ex.get_data(path + '/' + filename)
    flag = False  # leaving the headers
    p_id = 36
    no = 0

    for row in ra:
        if flag == False:
            flag = True
        else:
            state = row[0].encode('utf-8')
            population = int(row[1])
            change_factor = float(row[2])
            sex_ratio = int(row[3])
            year = 2011
            try:
                query = 'INSERT INTO POPULATION VALUES(\'' + str(
                    p_id
                ) + '\',\'' + state + '\',' + ` year ` + ',' + ` sex_ratio ` + ',' + ` population ` + ',' + ` change_factor ` + ')'
                c.execute(query)
            except:
                print state, p_id
            no += 1
            p_id += 1
    print no, "Rows inserted."
def readXlsAndXlsxFile(path):
    dic = OrderedDict()
    # 抓取数据
    xdata = get_data(path)
    for sheet in xdata:
        dic[sheet] = xdata[sheet]
    return dic
Пример #8
0
def get_config(path):
    global count
    for dir_path, dir_names, file_names in os.walk(path):
        for file in file_names:

            if (0 == file.find(".")):
                continue

            count = count + 1
            print ">> " + file
            data = get_data(dir_path + "/" + file)

            for k in data:
                if len(data[k]):
                    tab = data[k][1]
                    n = -1
                    cell = {}

                    for index in data[k]:
                        n = n + 1
                        if n < 2:
                            continue
                        cell[index[0]] = {}
                        m = 0
                        for i in index:
                            cell[index[0]][tab[m]] = i
                            m = m + 1
                    config[file.rstrip(".xlsx")] = cell
Пример #9
0
def read_xls_file():  
    xls_data = get_data(r"CorpusWordlist.xls")  
    print "Get data type:", type(xls_data)  

    for sheet_n in xls_data.keys():  
        #print sheet_n, ":", xls_data[sheet_n]  
        #print sheet_n, ":", type(xls_data[sheet_n]) 
        #print sheet_n, ":", len(xls_data[sheet_n]) 
        #print sheet_n, ":", xls_data[sheet_n][7]
        filter_list = list(filter((lambda each_line: len(each_line) > 2),xls_data[sheet_n]))
        map_list = map((lambda line: line[1]),filter_list)

        component_list = []
        for question in map_list:
            print ("question len is %d" % len(question))
            if len(question) > 2:
                from time import sleep
                #sleep(0.2)
                #sendRequest(question)
                #sys.exit(0)
            else:
                component_list.append(question) 
        #print map_list
        combination_list = getCombination(component_list)
        for q in combination_list:
             sendRequest(q)
Пример #10
0
def get_hotel_tripadvisor_link(path):
    result = OrderedDict()
    xls = get_data(path)
    data = json.dumps(xls, default=json_serial)
    lines = json.loads(data, object_hook=json_util.object_hook)['hotel_info']
    lines2 = json.loads(data, object_hook=json_util.object_hook)['review_info']
    # lines3=json.loads(data, object_hook=json_util.object_hook)['reviewer_info']
    # lines[0].append('Link')
    for i in xrange(1, len(lines)):
        while len(lines[i]) < 9:
            lines[i].append('')
        q = [str(x) for x in lines[i]]
        # print q
        # try:
        link = url_crawler(q[0],
                           ' '.join(q[2:5]) + " tripadvisor").start_crawl()
        print link
        try:
            lines[i][7] = link
        except:
            lines[i].append(link)
        # time.sleep(1)
        # except:
        #     # link=''
        #     # print 'fail'
        #     break

    # # print lines[:10]

    result.update({"hotel_info": lines})
    result.update({"review_info": lines2})
    # result.update({"reviewer_info":lines3})
    save_data(path, result)
Пример #11
0
    def parse(self, stream, media_type=None, parser_context=None):
        """
        Parses the incoming bytestream as XLS and return resulting data
        """
        stream_data = dict(get_data(stream))
        sheet = list(stream_data.keys())[0] if len(stream_data) == 1 else None

        if sheet is None or sheet != 'Data':
            raise ParseError(
                'XLS parse error - spreadsheet should contain one sheet named `Data`'
            )

        stream_data = stream_data[sheet]
        headers = stream_data[0]
        data = []

        try:
            for row in stream_data[1:]:
                row = {k: self._json_loads(v) for k, v in zip(headers, row)}
                row.update({
                    key: ""
                    for key in set(headers).difference(set(row.keys()))
                })
                data.append(row)

        except ValueError as e:
            raise ParseError(
                f"XLS parse error - invalid data in spreadsheet {getattr(e, 'message', e)}"
            )

        return data
Пример #12
0
def import_excel_file(self, *args, **kwargs):
    data = get_data("static/test_documents_upload.xls")
    data_dict = dict(data)
    now = date.today()
    for i in range(1, len(data_dict['Documents'][1:]) + 1):
        temp = data_dict['Documents'][i]
        try:
            try:
                VendorModel.objects.get(document_number=temp[1])
                return "exists"
            except ObjectDoesNotExist:
                if temp[4] > now or temp[5] > now:
                    continue
                if temp[6] < 0:
                    continue
                VendorModel.objects.create(invoice_number=temp[0],
                                           document_number=temp[1],
                                           type_of_invoice=temp[2],
                                           net_due_date=temp[3],
                                           doc_date=temp[4],
                                           pstng_date=temp[5],
                                           amount=temp[6],
                                           vendor_code=temp[7],
                                           vendor_name=temp[8],
                                           vendor_type=temp[9])
        except Exception as ex:
            print(ex)
            return False
    print("Success")
    return True
Пример #13
0
def showTable(filename):
    basedir = app.root_path
    excelName = filename  # 判断文件是否存在
    file_dir = os.path.join(basedir, app.config['UPLOAD_FOLDER'])
    excelPath = file_dir + "/" + excelName
    xls_data_ = get_data(excelPath)

    for sheet_n in xls_data_.keys():
        xls_title = xls_data_[sheet_n][0]
        xls_data = xls_data_[sheet_n]

    # 处理表头
    title = {}
    itemsCount = 0
    for k, v in enumerate(xls_data[0]):
        if k > 4:
            d = {}
            d['name'] = v.rsplit('-', 1)[0]
            d['col'] = int(v.rsplit('-', 1)[1])
            title[k] = d
            itemsCount = k

    itemsCount = itemsCount + 1
    data = {}
    data['title'] = title
    data['attrs'] = xls_data[1:]
    data['count'] = len(xls_data[1:])
    data['itcount'] = itemsCount

    return render_template('show.html', data=data)
Пример #14
0
def getData(filepath, sheet_num=0):
    data = get_data(filepath)
    data_sheet = [sheet for sheet in data]
    if sheet_num == 0:
        return [data[sheet] for sheet in data_sheet][0]
    else:
        return data[data_sheet[sheet_num - 1]]
Пример #15
0
def test_merged_cells():
    data = get_data(
        get_fixture("merged-cell-sheet.xls"),
        detect_merged_cells=True,
        library="pyexcel-xls")
    expected = [[1, 2, 3], [1, 5, 6], [1, 8, 9], [10, 11, 11]]
    eq_(data['Sheet1'], expected)
Пример #16
0
def crawl_hotel_reviewer_name(path, reviewer_path):
    print path
    result = OrderedDict()
    xls = get_data(path)
    data = json.dumps(xls, default=json_serial)
    # print data
    lines = json.loads(data, object_hook=json_util.object_hook)['hotel_info']
    reviews = json.loads(data,
                         object_hook=json_util.object_hook)['review_info']

    # reviewer_result = OrderedDict()
    # reviewer_xls = get_data(reviewer_path)
    # reviewer_data = json.dumps(reviewer_xls, default=json_serial)
    # reviewers=json.loads(reviewer_data, object_hook=json_util.object_hook)['reviewer_info']

    # reviewers_id_set=[reviewer[1] for reviewer in reviewers[1:] if len(reviewer) > 1]
    reviewers_id_set = []
    # print reviewers_id_set
    # exit(0)

    driver = get_webdriver()

    for index, review in enumerate(reviews[1:]):
        print index
        #  review[5],review[8]
        try:
            url = review[5]
            reviewer_data = hotel_reviewer_name_crawler(
                driver, url, reviewers_id_set)
            reviewer_id = reviewer_data[0]
            reviewer_name = reviewer_data[1]
            reviewer_exist = reviewer_data[2]
            print "Get reviewer: ", reviewer_name
            # if not reviewer_id or not reviewer_name:
            #     print 'Passed'
            #     continue

            reviews[index + 1][1] = reviewer_id
            reviews[index + 1][2] = reviewer_name

            if not reviewer_exist:
                print "Add new reviewer: ", reviewer_id, reviewer_name
                temp = []
                temp.append(reviewer_id)
                temp.append(reviewer_name)
                temp.append(url)

                # print len(reviewers)
                # reviewers.append(temp)
                # print len(reviewers)

        except Exception as e:
            print e
            driver = get_new_webdriver(driver)

    closeDriver(driver)

    result.update({"hotel_info": lines})
    result.update({"review_info": reviews})
    save_data(path, result)
Пример #17
0
def read_excel(path):
    """read csv, xls, xlsx
    """
    try:
        d = pyexcel_xls.get_data(path)

        tables = []
        for _, t in d.items():
            table = {'cells': []}
            max_row_len = 0
            for row in t:
                row_elements = []
                for col in row:
                    if not isinstance(col, str):
                        col = ''
                    row_elements.append({'text': clean_text(col)})
                table['cells'].append(row_elements)
                max_row_len = max(max_row_len, len(row_elements))

            for row in table['cells']:
                while len(row) < max_row_len:
                    row.append({'text': ''})
            tables.append(table)
        body = ''

        data = PaperData(body, tables)
    except Exception:
        logger.info('fail: %s', path)
        traceback.print_exc()
        return PaperData()

    return data
Пример #18
0
def fetchingdata(path, c):
    filename = 'police_2001_strength_xls.xls'
    ra = ex.get_data(path + '/' + filename)
    flag = False  # leaving the headers
    p_id = 1
    pol_id = 1
    no = 0

    for row in ra:
        if flag == False:
            flag = True
        else:
            state = row[0].encode('utf-8')
            strength = int(row[1])
            per100 = float(row[2])
            year = 2001
            query = 'INSERT INTO POLICE VALUES(\'' + str(
                pol_id
            ) + '\',\'' + str(
                p_id
            ) + '\',\'' + state + '\',' + ` year ` + ',' + ` per100 ` + ',' + ` strength ` + ')'
            c.execute(query)
            no += 1
            p_id += 1
            pol_id += 1
    print no, "Rows inserted."
Пример #19
0
def test_exploration():
    data = get_data(
        get_fixture("merged-sheet-exploration.xls"),
        detect_merged_cells=True,
        library="pyexcel-xls",
    )
    expected_sheet1 = [
        [1, 1, 1, 1, 1, 1],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
    ]
    eq_(data["Sheet1"], expected_sheet1)
    expected_sheet2 = [[3], [3], [3], [3, 4, 4, 4, 4, 4, 4], [3], [3], [3]]
    eq_(data["Sheet2"], expected_sheet2)
    expected_sheet3 = [
        ["", "", "", "", "", 2, 2, 2],
        [],
        [],
        [],
        ["", "", "", 5],
        ["", "", "", 5],
        ["", "", "", 5],
        ["", "", "", 5],
        ["", "", "", 5],
    ]
    eq_(data["Sheet3"], expected_sheet3)
Пример #20
0
def read_xls_file(path):
    xls_data = get_data(path)
    xlsDatas = {}
    for sheet_n in xls_data.keys():  #逐个读取工作表中的内容
        datas = xls_data[sheet_n]
        xlsDatas[sheet_n] = datas
    return xlsDatas
Пример #21
0
 def get_gude(self, fpath):
     data = get_data(fpath)
     rs = data and data['Sheet1'] or []
     for it in rs[1:]:
         tr = GuDe(it)
         self.gude_rs.append(tr)
     return self.gude_rs
Пример #22
0
def get_red(exl_path='./ssq.xls', random_order=False):
    ssq_data = []
    xls_data = get_data(exl_path)
    # print(type(xls_data))
    if random_order:
        for i in range(2, len(xls_data['data'])):
            # print(type(xls_data['data'][i][2]))
            # aaaa=xls_data['data'][i][2]
            temp = np.asarray([
                xls_data['data'][i][9] - 1, xls_data['data'][i][10] - 1,
                xls_data['data'][i][11] - 1, xls_data['data'][i][12] - 1,
                xls_data['data'][i][13] - 1, xls_data['data'][i][14] - 1
            ])
            ssq_data.append(temp)
    else:
        for i in range(2, len(xls_data['data'])):
            # print(type(xls_data['data'][i][2]))
            # aaaa=xls_data['data'][i][2]
            temp = np.asarray([
                xls_data['data'][i][2] - 1, xls_data['data'][i][3] - 1,
                xls_data['data'][i][4] - 1, xls_data['data'][i][5] - 1,
                xls_data['data'][i][6] - 1, xls_data['data'][i][7] - 1
            ])
            ssq_data.append(temp)
    return ssq_data
Пример #23
0
 def get_qingniu(self, fpath):
     data = get_data(fpath)
     rs = data and data['Sheet1'] or []
     for it in rs[4:]:
         tr = QingNiu(it)
         self.qingniu_rs.append(tr)
     return self.qingniu_rs
Пример #24
0
def test_complex_hidden_sheets():
    data = get_data(
        os.path.join("tests", "fixtures", "complex_hidden_sheets.xls"),
        skip_hidden_row_and_column=True,
    )
    expected = [[1, 3, 5, 7, 9], [31, 33, 35, 37, 39], [61, 63, 65, 67]]
    eq_(data["Sheet1"], expected)
Пример #25
0
def test_simple_hidden_sheets():
    data = get_data(
        os.path.join("tests", "fixtures", "hidden.xls"),
        skip_hidden_row_and_column=True,
    )
    expected = [[1, 3], [7, 9]]
    eq_(data["Sheet1"], expected)
Пример #26
0
def get_config(path):
    global count
    for dir_path, dir_names, file_names in os.walk(path):
        for file in file_names:

            if (0 == file.find(".")):
                continue

            count = count + 1
            print ">> " + file
            data = get_data(dir_path + "/" + file)

            for k in data:
                if len(data[k]):
                    tab = data[k][1]
                    n = -1
                    cell = {}

                    for index in data[k]:
                        n = n + 1
                        if n < 2:
                            continue
                        cell[index[0]] = {}
                        m = 0
                        for i in index:
                            cell[index[0]][tab[m]] = i
                            m = m + 1
                    config[file.rstrip(".xlsx")] = cell
Пример #27
0
def insert_value_by_mark(xls_filename,
                         row_mark,
                         col_mark,
                         value,
                         sheet_name=None):
    'inster value buy finding row_mark and col_mark.'
    import pyexcel_xls
    from tools_xls import find_row_col

    xls_total_data = pyexcel_xls.get_data(xls_filename)
    if sheet_name is None:
        xls_data = xls_total_data[next(iter(xls_total_data))]
    else:
        xls_data = xls_total_data[sheet_name]

    tar_rowi, _col = find_row_col(xls_data, row_mark)
    if tar_rowi is None:
        raise Exception(f"'{xls_filename}'内找不到{row_mark}")
    _row, tar_coli = find_row_col(xls_data, col_mark)
    if tar_coli is None:
        raise Exception(f"'{xls_filename}'内找不到{col_mark}")

    # workbook = xlwings.Book(xls_filename)
    workbook = app.books.open(xls_filename, update_links=True)
    if sheet_name is None:
        sheet: xlwings.Sheet = workbook.sheets[0]
    else:
        sheet: xlwings.Sheet = workbook.sheets[sheet_name]
    tar_cell = sheet[tar_rowi, tar_coli]
    tar_cell.value = value
    workbook.save()
    print(f'    {xls_filename} - {row_mark} - {col_mark} - {value} 写入成功')
Пример #28
0
def StarImgRead(path):
    """
    函数说明:星图文件读取函数
    :param path: 星图路径
    :return: U矢量列表
    """
    # 计算焦距
    angle = 10 / 2
    angle_rad = angle * np.pi / 180
    pixel_len = (512) / 2
    f = pixel_len / np.tan(angle_rad)
    # 读取数据
    dataset = xls.get_data(path)['Sheet1']
    U_list = []
    star_list = []
    for data in dataset:
        # 读取坐标
        xs = data[1]
        ys = data[2]
        # 坐标转换:偏差值需要细究
        xs = xs - pixel_len
        ys = pixel_len - ys
        # 转化为三维坐标
        U = np.array([xs, ys, f])
        # 转化为单位向量
        U = U / np.linalg.norm(U)
        U_list.append(U)
        star = [xs, ys]
        star_list.append(star)
    return U_list, star_list
Пример #29
0
def update_hotel_reviewer_score(path):
    driver = get_webdriver()
    result = OrderedDict()
    xls = get_data(path)
    data = json.dumps(xls, default=json_serial)

    lines = json.loads(data, object_hook=json_util.object_hook)['hotel_info']
    reviews = json.loads(data,
                         object_hook=json_util.object_hook)['review_info']
    reviewers = json.loads(data,
                           object_hook=json_util.object_hook)['reviewer_info']
    for index, reviewer in enumerate(reviewers[1:]):
        print reviewer
        while len(reviewer) < 18:
            reviewer.append('')
        if True:  #reviewer[11]!='' and reviewer[12]!='':
            link = reviewer[2]
            # print reviewer
            print link
            try:
                scores = get_reviewer_scores(driver, link)
                print scores
                # break

                reviewer[11] = scores[0]
                reviewer[12] = scores[1]
                reviewer[13] = scores[2]
                reviewer[14] = scores[3]
                reviewer[15] = scores[4]
            except:
                pass
    result.update({"hotel_info": lines})
    result.update({"reviewer_info": reviewers})
    result.update({"review_info": reviews})
    save_data(path, result)
Пример #30
0
    def excel_import(self):
        importFileName, importFileType = QFileDialog.getOpenFileName(
            self, 'Openfile', './', 'xls Files (*.xls)')
        if len(importFileName) < 1:
            print('no file found')
        else:
            xls_data = get_data(importFileName)
            xls_list = []
            for sheet_n in xls_data.keys():
                this_xls_item = xls_data[sheet_n]
                if len(this_xls_item) > 0:
                    xls_list.append(this_xls_item)
            if len(xls_list) < 1:
                print('xls is empty')
            else:
                print('xls is usable')
                for item in xls_list:

                    item_id = item[0][0]
                    item_website = item[0][1]
                    item_status = item[0][2]
                    item_createtime = item[0][3]
                    insert_item_sql = """INSERT INTO records(`barcode`,`status`,`createtime`) VALUES(?,?,?)"""
                    try:
                        my_sqlite_db = Database()
                        my_sqlite_db.insert(
                            insert_item_sql,
                            [item_website, item_status, item_createtime])
                        print('save item ok')
                        self.query_data_from_db()
                    except Exception as e:
                        print(e)
Пример #31
0
 def get_xls(self, url, sheet):
     """读取ecxel文件"""
     self.data_xls = pyexcel_xls.get_data(url)
     #这里读取excel文件,
     for get_sheet in self.data_xls.keys():
         if str(get_sheet) == sheet:
             return self.data_xls[get_sheet]
Пример #32
0
    def shard(self, path, output_template):
        xls = get_data(path)
        data = json.dumps(xls, default=self.json_serial)
        lines = json.loads(data, object_hook=json_util.object_hook)['hotel_info']
        lines2 = json.loads(data, object_hook=json_util.object_hook)['review_info']
        # lines3 = json.loads(data, object_hook=json_util.object_hook)['reviewer_info']

        head=lines[0]
        head2 = lines2[0]
        # head3 = lines3[0]

        interval=1
        start=1
        count=1
        # print head
        for i in xrange(start,len(lines),interval):
            result = OrderedDict()
            print i, min(i+interval,len(lines))
            data=lines[i:min(i+interval,len(lines))]
            data.insert(0,head)
            result.update({"hotel_info": data})
            result.update({"review_info": [head2]})
            # result.update({"reviewer_info": [head3]})
            save_data(output_template.format(i), result)
            count+=1
Пример #33
0
    def shard_reviewer(self, interval, path, output_format):
        xls = get_data(path)
        data = json.dumps(xls, default=self.json_serial)
        lines = json.loads(data, object_hook=json_util.object_hook)['reviewer_info']

        head=lines[0]
        start=1
        count=1
        # print head
        for i in xrange(start,len(lines),interval):
            output_file = output_format.format(count)
            result = OrderedDict()
            print i, min(i+interval,len(lines))
            if os.path.isfile(output_file):
                print True
                # if not os.path.isfile(output_format.format(count + 1)):
                #     data=lines[i:min(i+interval,len(lines))]
                #     data.insert(0,head)
                #     result.update({"reviewer_info": data})
                #     output_file = output_format.format(str(count)+"_x")
                #     save_data(output_file, result)
            else:
                data=lines[i:min(i+interval,len(lines))]
                data.insert(0,head)
                result.update({"reviewer_info": data})
                save_data(output_file, result)
            count+=1
Пример #34
0
def test_complex_merged_cells():
    data = get_data(
        get_fixture("complex-merged-cells-sheet.xls"),
        detect_merged_cells=True,
        library="pyexcel-xls")
    expected = [
        [1, 1, 2, 3, 15, 16, 22, 22, 24, 24],
        [1, 1, 4, 5, 15, 17, 22, 22, 24, 24],
        [6, 7, 8, 9, 15, 18, 22, 22, 24, 24],
        [10, 11, 11, 12, 19, 19, 23, 23, 24, 24],
        [13, 11, 11, 14, 20, 20, 23, 23, 24, 24],
        [21, 21, 21, 21, 21, 21, 23, 23, 24, 24],
        [25, 25, 25, 25, 25, 25, 25, 25, 25, 25],
        [25, 25, 25, 25, 25, 25, 25, 25, 25, 25]
    ]
    eq_(data['Sheet1'], expected)
Пример #35
0
Файл: views.py Проект: grv07/QnA
def save_XLS_to_OBJECTIVE(request):
	# _level_dict = {'medium': 'M', 'easy': 'E', 'hard': 'H'}
	if verify_user_hash(request.data.get('user'), request.data.get('hash')):
		print '----'
		f = request.data['figure']

		with open('mcq_read_now.xls', 'wb+') as destination:
			for chunk in f.chunks():
				destination.write(chunk)

		data = get_data("mcq_read_now.xls")
		total_entries = len(data)
		temp_data = data[0]
		# Check if columns of xls file provided are not tampered/changed.
		if temp_data == OBJECTIVE_FILE_COLS:
			try:
				# This dict contains raw-way data with specified keys from temp_data or xls keys
				# data_dict = collections.OrderedDict({})
				correct_serializers_list = []	
				d = {}
				for i in data[1:]:
					if BLANK_HTML in i[4]:
						if not d.has_key(i[0]):
							try:
								d[i[0]] = SubCategory.objects.get(sub_category_name=i[0])
							except SubCategory.DoesNotExist as e:
								return Response({ "errors" : "Wrong sub-category specified." } , status = status.HTTP_400_BAD_REQUEST)

						temp_dict = { 'sub_category':d[i[0]].id, 'level':i[1], 'explanation':'', 'correct':'', 'content':i[4], 'que_type': 'objective' }
						temp_dict['explanation'] = str(i[2])
						temp_dict['correct'] = str(i[3])
						serializer = ObjectiveQuestionSerializer(data = temp_dict)
						if serializer.is_valid():
							correct_serializers_list.append(serializer)
						else:
							return Response({ "errors" : "There is some error while saving your questions. Correct the format." } , status = status.HTTP_400_BAD_REQUEST)
					else:
						return Response({ "errors" : "There is some error while saving your questions. Correct the format." } , status = status.HTTP_400_BAD_REQUEST)
				if len(correct_serializers_list) == total_entries - 1:
					for serializer in correct_serializers_list:
						serializer.save()
				return Response({}, status = status.HTTP_200_OK)
			except Exception as e:
				print e.args
		return Response({ "errors" : "There is some error while saving your questions. Correct the format." } , status = status.HTTP_400_BAD_REQUEST)
	else:
		return Response({'errors': 'Corrupted User.'}, status=status.HTTP_404_NOT_FOUND)
Пример #36
0
def save_test_private_access_from_xls(f, quiz_id):
	with open('test_private_access.xls', 'wb+') as destination:
		for chunk in f.chunks():
			destination.write(chunk)
	data = get_data("test_private_access.xls")[1:]
	try:
		quiz = Quiz.objects.get(id = quiz_id)
		for d in data:
			user_name = d[0]
			user_email = d[1]
			inviteduser, created = InvitedUser.objects.get_or_create(user_name = user_name, user_email = user_email)
			inviteduser.add_inviteduser(quiz_id)
			html = PRIVATE_TEST_ACCESS_HTML.format(quiz_name = quiz.title, test_link = quiz.url)
			send_mail(html, user_email, subject = "Test access granted")
		return True
	except Exception as e:
		return False
def fetchingdata(path, c):
	filename = 'crimes_count_women_01_12.xls'
	ra = ex.get_data(path + '/' + filename)

	no = 0
	c_id = 353
	g_id01 = 1
	g_id11 = 37

	for row in ra:
		state = row[0].encode('utf-8')
		type_crime = row[1].encode('utf-8')
		
		query = 'INSERT INTO TOTALCRIME VALUES(\'' + state + '\',\'' + type_crime + '\',' + `2013` + ',' + `row[2]` + ')'
		c.execute(query)
			
		no += 1

	print no , "Rows inserted."
Пример #38
0
    def process_file(self, doc):
        fid = doc.document.file_id
        file_info = self.ctx.bot.bot.get_file(fid)
        file_format = file_info.file_path.split('.')[-1]
        if file_format in ['csv', 'xls', 'xlsx']:
            content = self.ctx.bot.bot.download_file(file_info.file_path)
            io = StringIO(content)
            try:
                df = pd.read_csv(io)
            except:
                excel_data = get_data(io)
                _keys = excel_data.values()[0][0]
                _values = excel_data.values()[0][1:]
                _items = [dict(zip(_keys, rec)) for rec in _values]
                df = pd.DataFrame(_items)

            df_keys = {k.lower(): k for k in df.to_dict().keys()}
            data = pd.DataFrame()

            mapping = {
                'id': ['id', 'product_id'],
                'active': ['active', 'visible', u'активно'],
                'cat': ['category', u'раздел 1', u'категория'],
                'name': [u'наименование', 'name'],
                'desc': [u'описание', 'description', 'description(html)'],
                'price': ['price', u'цена'],
                'img': ['img_url', u'изображение', u'ссылка на изображение']
            }

            for k, values in mapping.items():
                for col_name in values:
                    if col_name in df_keys:
                        data[k] = df[df_keys[col_name]]

            data['active'] = data['active'].map(lambda x: '1' if x in [1, 'y'] else '0')
            items = data.T.to_dict().values()
            if len(items) == 0:
                raise Exception("no items added")

            self.ctx.tmpdata = items
        else:
            pass
def fetchingdata(path, c):
	filename = 'police_2001_strength_xls.xls'
	ra = ex.get_data(path + '/' + filename)
	flag = False # leaving the headers
	p_id = 1
	pol_id = 1
	no = 0

	for row in ra:
		if flag == False:
			flag = True
		else:
			state = row[0].encode('utf-8')
			strength = int(row[1])
			per100 = float(row[2])
			year = 2001			
			query = 'INSERT INTO POLICE VALUES(\'' + str(pol_id) + '\',\'' + str(p_id) + '\',\''+ state + '\',' + `year` + ',' + `per100` +  ',' + `strength` +')'
			c.execute(query)
			no += 1
			p_id += 1
			pol_id += 1
	print no , "Rows inserted."
def fetchingdata(path, c):
	filename = 'crimes_count_women_01_12.xls'
	ra = ex.get_data(path + '/' + filename)

	no = 0
	c_id = 353
	g_id01 = 1
	g_id11 = 37

	for row in ra:
		state = row[0].encode('utf-8')
		type_crime = row[1].encode('utf-8')
		year_arr = range(2001, 2014)
		counter = 0

		for i in range(2, 15):
			query = 'INSERT INTO TOTALCRIME VALUES(\'' + state + '\',\'' + type_crime + '\',' + `year_arr[counter]` + ',' + `row[i]` + ')'
			counter = counter + 1
			c.execute(query)
			
		no += 12

	print no , "Rows inserted."
Пример #41
0
def test_exploration():
    data = get_data(
        get_fixture("merged-sheet-exploration.xls"),
        detect_merged_cells=True,
        library="pyexcel-xls")
    expected_sheet1 = [
        [1, 1, 1, 1, 1, 1],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2]]
    eq_(data['Sheet1'], expected_sheet1)
    expected_sheet2 = [
        [3],
        [3],
        [3],
        [3, 4, 4, 4, 4, 4, 4],
        [3],
        [3],
        [3]]
    eq_(data['Sheet2'], expected_sheet2)
    expected_sheet3 = [
        ['', '', '', '', '', 2, 2, 2],
        [],
        [],
        [],
        ['', '', '', 5],
        ['', '', '', 5],
        ['', '', '', 5],
        ['', '', '', 5],
        ['', '', '', 5]]
    eq_(data['Sheet3'], expected_sheet3)
def fetchingdata(path, c):
	filename = 'population_census2011.xls'
	ra = ex.get_data(path + '/' + filename)
	flag = False # leaving the headers
	p_id = 36
	no = 0

	for row in ra:
		if flag == False:
			flag = True
		else:
			state = row[0].encode('utf-8')
			population = int(row[1])
			change_factor = float(row[2])
			sex_ratio = int(row[3])
			year = 2011
			try:
				query = 'INSERT INTO POPULATION VALUES(\'' + str(p_id) + '\',\'' + state + '\',' + `year` + ',' + `sex_ratio` + ',' + `population` + ',' + `change_factor` + ')'
				c.execute(query)
			except:
				print state, p_id
			no += 1
			p_id += 1
	print no , "Rows inserted."
Пример #43
0
def read_xls_file():
    xls_data = get_data(r"source.xls")
    sheet_1 = xls_data.get('Sheet1')
    return sheet_1
Пример #44
0
def load_from_xls(filepath, *,
                  names_sheet='Nom',
                  tests_sheets=['B1', 'B2', 'B3', 'B4'],
                  tests_dates=[(2015, 9), (2015, 11), (2016, 2), (2016, 4), (2016, 6)],
                  tests_stopwords=['', 'Total SSFL'],
                  name_pos=(3, 1),
                  test_pos=(0, 2),
                  max_decal=1,
                  comp_decal=2,
                  norm_weight=20):
    results = []

    data = pyexcel_xls.get_data(filepath)

    students = []
    for line in data[names_sheet][name_pos[0]:]:
        try:
            name = line[name_pos[1]]
            if name:
                students.append(name)
        except IndexError as e:
            pass

    for sheet in tests_sheets:
        _ = data[sheet][test_pos[0]][test_pos[1]:]
        tests = list(takewhile(lambda s: s not in tests_stopwords, _))
        maxs = data[sheet][test_pos[0] + max_decal][test_pos[1]:test_pos[1] + len(tests)]
        comps = data[sheet][test_pos[0] + comp_decal][test_pos[1]:test_pos[1] + len(tests)]
        for student_i, student in enumerate(students):
            if len(tests) == 0:
                continue

            year, month = tests_dates[tests_sheets.index(sheet)]
            tests_start = datetime.datetime(year=year, month=month, day=1)

            year, month = tests_dates[tests_sheets.index(sheet) + 1]
            tests_end = datetime.datetime(year=year, month=month, day=1)

            tests_interval = (tests_end - tests_start).days // len(tests)

            for test_i, test in enumerate(tests):
                row = name_pos[0] + student_i
                column = test_pos[1] + test_i
                value = data[sheet][row][column]

                if value != 0 and not value:
                    value = pandas.np.nan

                test_date = tests_start + datetime.timedelta(days=test_i * tests_interval)

                result = {
                    'name': student,
                    'period': sheet,
                    'date': test_date,
                    'code': '%s/%02d/%s' % (sheet, test_i + 1, comps[test_i]),
                    'test': '%s' % test,
                    'weight': maxs[test_i],
                    'skill': comps[test_i],
                    'result': value
                }

                results.append(result)

    df = pandas.DataFrame.from_dict(results)
    df['weighted_result'] = df['result'] / df['weight'] * norm_weight

    tests = df.dropna().groupby('date')['result'].describe().unstack()[['mean', '25%', '50%', '75%']]
    ndf = df.merge(tests, how='outer', left_on=['date'], right_index=True)
    ndf['range'] = (ndf['75%'] - ndf['25%'])
    ndf['normalized_result'] = (ndf['result'] - ndf['50%']) / ndf['range']

    ndf['normalized_result'] = ndf['normalized_result'].where(ndf['range'] > 0, 0)
    ndf['normalized_result'] = ndf['normalized_result'].where(ndf['result'].notnull(), pandas.np.nan)
    ndf = ndf.drop('range', axis=1)

    return ndf
Пример #45
0
def read_xls_file():
    xls_data = get_data(r"write1.xls")
    sheet_1 = xls_data.get('Sheet1')
    return sheet_1
Пример #46
0
 def get_tps_graph_data(self):
     tps_xls = self.result_dir + "\\An_Report1\\Report3.xls"
     origin_data = get_data(tps_xls)
     origin_columns = origin_data
     self.summary_data['tps_graph_data'] = origin_columns['Sheet1']
Пример #47
0
Файл: views.py Проект: grv07/QnA
def save_XLS_to_MCQ(request):
	if verify_user_hash(request.data.get('user'), request.data.get('hash')):
		logger.info('under mcq.save_XLS_to_MCQ')
		'''
		{u'sub_category': u'5', u'answer_order': u'random', u'explanation': u'eeeeeeeeee',
		 u'level': u'easy', u'correctoption': u'1', u'content': u'eeeeeeeeee', 
		 u'optioncontent': {u'1': u'eeeeeeeeee', u'2': u'eeeeeeeeeee'}}
		'''
		# _level_dict = {'medium': 'M', 'easy': 'E', 'hard': 'H'}
		f = request.data['figure']

		with open('mcq_read_now.xls', 'wb+') as destination:
			for chunk in f.chunks():
				destination.write(chunk)

		data = get_data("mcq_read_now.xls")
		total_entries = len(data)

		temp_data = data[0]
		_dict_mcq_keys = ['category', 'sub_category', 'que_type', 'level', 'explanation', 'option_display_order_random', 'correctoption', 'content', 'ideal_time', 'problem_type']
		# This dict contains raw-way data with specified keys from temp_data or xls keys
		data_dict = OrderedDict({})
		# _quiz_id = None
		category_dict = {}
		sub_category_dict = {}
		option_check = ['option1', 'option2', 'option3', 'option4', 'option5', 'option6']
		

		data_list = []
		for row_name in _dict_mcq_keys:
			data_dict.update({row_name : ''})
		
		# Create empty value dict. for each mcq entry ..
		for i, list_data in enumerate(data[1:]):
			data_list.append(data_dict.copy())
			optioncontent = {}
			for j, mcq_data in enumerate(list_data):
				mcq_data = ascii_safe(mcq_data)
				if temp_data[j] == 'correctoption':
					data_list[i][temp_data[j]] = str(mcq_data)
				# Check if row is for option then create a dict_ of options and add it on optioncontent ....
				elif temp_data[j] in option_check:
					if mcq_data:
						optioncontent[option_check.index(temp_data[j])+1] = str(mcq_data)
						data_list[i]['optioncontent'] =  optioncontent


					# data_list[i]['optioncontent'] = optioncontent[option_check.index(temp_data[j])] = str(mcq_data)

				#Check first for // key(category name) value(category id) // pair in dict. if not exist then call query.  
				elif temp_data[j] == 'category':
					if category_dict.has_key(mcq_data):
						category_id = category_dict.get(mcq_data)
					else:
						try:
							category_id = Category.objects.get(category_name = mcq_data).id
						except Exception as e:
							logger.error('under mcq.save_XLS_to_MCQ '+str(e.args))
							category_id = None
						category_dict[mcq_data] = category_id
					data_list[i][temp_data[j]] = str(category_id)

				#Check first for // key(sub_category name) value(sub_category id) // pair in dict. if not exist then call query.
				elif temp_data[j] == 'sub_category':
					try:
						if sub_category_dict.has_key(mcq_data):
							sub_category_id = sub_category_dict.get(mcq_data)
						else:
							sub_category_id = SubCategory.objects.get(sub_category_name = mcq_data).id
							sub_category_dict[mcq_data] = sub_category_id
						data_list[i][temp_data[j]] = str(sub_category_id)
					except SubCategory.DoesNotExist as e:
						logger.error('under mcq.save_XLS_to_MCQ wrong subcategory specified '+str(e.args))
						return Response({ "errors" : "Wrong sub-category specified." } , status = status.HTTP_400_BAD_REQUEST)				
				else:
					data_list[i][temp_data[j]] = str(mcq_data)

		# DONE ........
		 
		return create_mcq(request, data_list)
		# return Response({'msg' : data_list}, status = status.HTTP_200_OK)
	else:
		logger.error('under quiz.save_XLS_to_MCQ wrong hash')
		return Response({'errors': 'Corrupted User.'}, status=status.HTTP_404_NOT_FOUND)
Пример #48
0
from pyexcel_xls import get_data
import csv
import sys


def convert_row(row):
  try:
    code = u"%d" % row[0]
  except TypeError:
    code = u"%s" % row[0]
  code_type = u"%s" % row[1]
  name = u"%s" % row[2]
  return code, code_type, name

if __name__ == '__main__':
  
  filename = sys.argv[1]
#'./data/koatuu/KOATUU_DBF_122015.xls'
  output_filename = sys.argv[2]
#'./data/koatuu_122015.csv'
  print 'Converting xls to csv...'
  data = get_data(filename)
  koatuu = data['Sheet1']
  koatuu = map(convert_row, koatuu)
  with open(output_filename, 'wb') as csvfile:
    writer = csv.writer(csvfile, delimiter=',',
                        quotechar='"', quoting=csv.QUOTE_MINIMAL)
    for row in koatuu:
      writer.writerow([s.encode("utf-8") for s in row])
  print 'Converted %s to %s' % (filename, output_filename)
Пример #49
0
 def parse(self, path):
     return get_data(path)
Пример #50
0
import json 

from pyexcel_xls import get_data

data = json.loads(json.dumps(get_data("../data/fb20.age_categories.gender_adjusted.rmatrix.top100s.xls")))
db = {}

for age_range,sheet in data.iteritems():
	db[age_range] = {}
	#Format of item in each list -- (feature r p feature r p)
	for item in sheet:
		db[age_range][item[0]] = item[1]
		db[age_range][item[3]] = item[4]

json.dump(db,open('../data/db.json','wb'))
Пример #51
0
__author__ = 'bhushan'

from pyexcel_xls import get_data
from pprint import pprint

data = get_data("CIS_CentOS_Linux_7_Benchmark_v1.1.0.xls")
pprint(data['Level 1'])
Пример #52
0
def test_simple_hidden_sheets():
    data = get_data(os.path.join("tests", "fixtures", "hidden.xls"),
                    skip_hidden_row_and_column=True)
    expected = [[1, 3], [7, 9]]
    eq_(data['Sheet1'], expected)
Пример #53
0
def test_complex_hidden_sheets():
    data = get_data(
        os.path.join("tests", "fixtures", "complex_hidden_sheets.xls"),
        skip_hidden_row_and_column=True)
    expected = [[1, 3, 5, 7, 9], [31, 33, 35, 37, 39], [61, 63, 65, 67]]
    eq_(data['Sheet1'], expected)
Пример #54
0
from PIL import Image
from PIL import ImageFont, ImageDraw, ImageOps
from pyexcel_xls import get_data
import json
import shutil

photosdir = "C:\Users\Cityflo1\pil-image-overlay\Photos"
receiptsdir = "C:\Users\Cityflo1\pil-image-overlay\Receipt"
passdir = "C:\Users\Cityflo1\pil-image-overlay\Pass"

destination = "C:\Users\Cityflo1\pil-image-overlay"


details = get_data("details.xls")

for customer_detail in details["Sheet1"]:
        if "commute" in customer_detail[9]:
            im=Image.open("2.png")
            lm=Image.open("receipt.png")
        elif "shuttle" in customer_detail[9]:
            im=Image.open("3.png")
            lm=Image.open("receipt.png")
	
	shutil.move(photosdir + "\\" + str(customer_detail[8]), destination + "\\" + str(customer_detail[8]))

	f = ImageFont.truetype("avantgarde.otf",14)

	name_txt=Image.new('L', (500,500))
	name_block = ImageDraw.Draw(name_txt)
	name_block.text( (15, 38), str(customer_detail[0]),  font=f, fill=255)