Exemplo n.º 1
0
def test_tree():
    text = '''
  年龄, 工作, 已婚,   信用, 可以借贷?
  青年, no ,  no ,   一般   , 不给
  青年, no ,  no ,   好    , 不给
  青年, yes,  no ,   好    , 给
  青年, yes,  yes,   一般   , 给
  青年, no ,  no ,   一般   , 不给
  中年, no ,  no ,   一般   , 不给
  中年, no ,  no ,   好    , 不给
  中年, yes,  yes,   好    , 给
  中年, no ,  yes,   非常好 , 给
  中年, no ,  yes,   非常好 , 给
  老年, no ,  yes,   非常好 , 给
  老年, no ,  yes,   好    ,  给
  老年, yes,  no ,   好    ,  给
  老年, yes,  no ,   非常好 , 给
  老年, no ,  no ,   一般   , 不给
  '''
    data, title = datamatrix(text)
    dt = DecisionTree(data, labels=title)
    n = dt.best_split_feature(data)
    tree = dt.decision_tree
    print(n)
    puts(tree)

    test1 = dt.classify(['青年', 'no', 'no', '好'], decision_tree=tree)
    test2 = dt.classify(['青年', 'no', 'yes', '非常好'], decision_tree=tree)
    test3 = dt.classify(['老年', 'yes', 'yes', '一般'], decision_tree=tree)
    test4 = dt.classify(['老年', 'no', 'no', '好'], decision_tree=tree)
    test1 | should.eq('不给')
    test2 | should.eq('给')
    test3 | should.eq('给')
    test4 | should.eq('不给')
Exemplo n.º 2
0
def brute_force_params(run_func, logger, **params_range):
  '''尝试参数的各种可能组合
  每个输入参数是一个可能值的列表, 比如 range(0, 250, 10)
  使用 itertools.product 遍历所有可能的参数组'''
  from itertools import product
  import time
  total_start = time.time()
  total = []
  params_product = list(product(*[[(k, elem) for elem in v] for k, v in params_range.items()]))
  puts('params prepared, {} variantions'.format(len(params_product)))
  for i, params in enumerate(params_product):
    loop_start = time.time()

    result = run_func(**dict(params))
    # result = 0
    total.append([params, result])
    loop_cost = time.time() - loop_start
    total_cost = time.time() - total_start
    percent = (i+1) / len(params_product)
    info = 'get <{result:.3%}> by: {params} \n    [current {loop_cost:.1f}s / total {total_cost:.1f}s] {percent:.2%}'.format_map(vars())
    logger.debug(info)

  puts('all done!')
  logger.debug(total)
  return total
Exemplo n.º 3
0
def test_word_filler_render():


  t1 = os.getcwd() + '/test/test_templates/test_{{name}}_面积计算表.doc'
  t2 = os.getcwd() + '/test/test_templates/test_no_field_面积计算表.doc'
  from infotext import InfoText

  text = '''
    单位名称: 测试单位
    name: 测试单位name
    项目名称: 测试项目
    项目编号: 2015-项目编号-001
    面积90: 12345.600
    面积80: 12345.300
    地籍号: 1234567890010010000
    四至: 测试路1;测试街2;测试路3;测试街4
    土地坐落: 测试路以东,测试街以南
    area: 1000
    已设定值: value
  '''

  info = InfoText.from_string(text)

  filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output')
  filler.detect_required_fields(unique=False) | puts()
  filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output')
  filler.detect_required_fields(unique=True) | puts()

  filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output')
  filler.render(info=info)
Exemplo n.º 4
0
def test_tree():
  text = '''
  年龄, 工作, 已婚,   信用, 可以借贷?
  青年, no ,  no ,   一般   , 不给
  青年, no ,  no ,   好    , 不给
  青年, yes,  no ,   好    , 给
  青年, yes,  yes,   一般   , 给
  青年, no ,  no ,   一般   , 不给
  中年, no ,  no ,   一般   , 不给
  中年, no ,  no ,   好    , 不给
  中年, yes,  yes,   好    , 给
  中年, no ,  yes,   非常好 , 给
  中年, no ,  yes,   非常好 , 给
  老年, no ,  yes,   非常好 , 给
  老年, no ,  yes,   好    ,  给
  老年, yes,  no ,   好    ,  给
  老年, yes,  no ,   非常好 , 给
  老年, no ,  no ,   一般   , 不给
  '''
  data, title = datamatrix(text)
  dt = DecisionTree(data, labels=title)
  n = dt.best_split_feature(data)
  tree = dt.decision_tree
  print(n)
  puts(tree)

  test1 = dt.classify(['青年', 'no',  'no', '好'], decision_tree=tree)
  test2 = dt.classify(['青年', 'no',  'yes', '非常好'], decision_tree=tree)
  test3 = dt.classify(['老年', 'yes', 'yes', '一般'], decision_tree=tree)
  test4 = dt.classify(['老年', 'no',  'no', '好'], decision_tree=tree)
  test1    | should.eq('不给')
  test2    | should.eq('给')
  test3    | should.eq('给')
  test4    | should.eq('不给')
Exemplo n.º 5
0
def brute_force_params(run_func, logger, **params_range):
    '''尝试参数的各种可能组合
  每个输入参数是一个可能值的列表, 比如 range(0, 250, 10)
  使用 itertools.product 遍历所有可能的参数组'''
    from itertools import product
    import time
    total_start = time.time()
    total = []
    params_product = list(
        product(*[[(k, elem) for elem in v] for k, v in params_range.items()]))
    puts('params prepared, {} variantions'.format(len(params_product)))
    for i, params in enumerate(params_product):
        loop_start = time.time()

        result = run_func(**dict(params))
        # result = 0
        total.append([params, result])
        loop_cost = time.time() - loop_start
        total_cost = time.time() - total_start
        percent = (i + 1) / len(params_product)
        info = 'get <{result:.3%}> by: {params} \n    [current {loop_cost:.1f}s / total {total_cost:.1f}s] {percent:.2%}'.format_map(
            vars())
        logger.debug(info)

    puts('all done!')
    logger.debug(total)
    return total
Exemplo n.º 6
0
def test_intepolate_polyline_dx():
  '''多段线加密 and 分割为多段线的碎片'''
  cad = AutoCAD()
  for pl in cad.selecting():
    # distance = 0.3
    distance = 0.2
    interpl, report = cad.interpolate_polyline(pl, distance=distance, delete_original=True, break_at_vertexes=True)
    puts('多段线加密 分割为多段线的碎片')
Exemplo n.º 7
0
def test_info_nested_by_yaml_load():
  path = os.getcwd() + '/test/nested.inf'
  info = InfoText.from_yaml(path)
  puts(info)
  print('----')
  puts(info.content)
  print('----')
  print(yaml.dump(info.content))
Exemplo n.º 8
0
def test_trim_result():
  path = 'result_by_self_train[28000].csv'
  result = []
  for i, r, b in load_csv(path, sample=0):
    result.append(int(r) if r != 'classify failed' else random.choice(list(range(0, 10))))
  puts(statistic(result))
  result = [[r] for r in result]
  write_csv('submit_' + path, rows=result, headers=['val'])
Exemplo n.º 9
0
def test_check_result():
    path_result = 'result_by_self_train[28000].csv'
    path_benchmark = 'rf_benchmark.csv'
    result = load_csv(path_result, sample=0)
    # benchmark = load_csv(path_benchmark, sample=21)
    score = []
    for i, r, b in result:
        score.append('+' if str(r) == b[1:2] else '-')
    puts(statistic(score))
Exemplo n.º 10
0
def test_intepolate_polyline():
  '''多段线加密'''
  cad = AutoCAD()
  for pl in cad.selecting():
    # distance = 0.3
    distance = 0.2
    interpl, report = cad.interpolate_polyline(pl, distance=distance, delete_original=True, break_at_vertexes=False)
    interpl.color = 'green'
    puts('多段线加密')
Exemplo n.º 11
0
def test_check_result():
  path_result = 'result_by_self_train[28000].csv'
  path_benchmark = 'rf_benchmark.csv'
  result = load_csv(path_result, sample=0)
  # benchmark = load_csv(path_benchmark, sample=21)
  score = []
  for i, r, b in result:
    score.append('+' if str(r) == b[1:2] else '-')
  puts(statistic(score))
Exemplo n.º 12
0
def test_trim_result():
    path = 'result_by_self_train[28000].csv'
    result = []
    for i, r, b in load_csv(path, sample=0):
        result.append(
            int(r) if r != 'classify failed' else random.
            choice(list(range(0, 10))))
    puts(statistic(result))
    result = [[r] for r in result]
    write_csv('submit_' + path, rows=result, headers=['val'])
Exemplo n.º 13
0
def test_info_additional_keys():
  from pyshould import should
  path = os.getcwd() + '/test/nested.inf'
  info = InfoText.from_yaml(path)
  puts(info.content)
  info.get('a') | should.eq(123)
  info.get('ErrorKey') | should.eq(None)
  info.get('foo') | should.eq('bar')          # from key<default>
  info.get('current_year') | should.eq(1404)  # key<default> contains this 1404
  print(info.get('current_date'))             # key<default> does not contain this,
Exemplo n.º 14
0
def test_redraw_vertex_sequence():
  '''重绘多段线的顶点顺序'''
  cad = AutoCAD()
  for pl in cad.selecting():

    first = None  # first = -1
    redrawpl = cad.redraw_vertex_sequence(pl, first=first,
                                          hint=False,
                                          auto_reverse=True)
    redrawpl.color = 'yellow'
    puts('重绘顶点顺序')
Exemplo n.º 15
0
def test_de_intepolate_polyline():
  '''多段线抽稀'''
  cad = AutoCAD()
  for pl in cad.selecting():
    interpl, report = cad.de_interpolate_polyline(pl, threshold=3)
    if abs(interpl.area - pl.area) < 30:
      interpl.color = 'green'
      pl.delete()
    else:
      interpl.delete()
    puts('多段线抽稀 report=')
Exemplo n.º 16
0
def parse_packaging_served_detail_page(url, folder):
    url | puts()
    r = requests.get(url)
    doc = PyQuery(r.text) | puts()
    # urls = doc('#project-modules li.image>img').attr('src') | puts()

    folder = to_file_name(folder) | puts()
    if not os.path.exists(folder):
        os.mkdir(folder)
    for i, elem in enumerate(doc("#project-modules li.image>img"), 1):
        url = PyQuery(elem).attr("src")
        save_image(url, folder, folder + "_" + str(i))
Exemplo n.º 17
0
  def train(self):
    for i in range(self.n_trees):

      features_index = random.sample(list(range(self.n_features)), self.n_features_per_tree)
      self.features_index_for_trees.append(features_index)
      data_filtered = self.filter_traindata(features_index)
      labels_filtered = self.filter_labels(features_index)
      tree = DecisionTree(data_filtered, labels=labels_filtered)
      self.trees.append(tree)
    if self.verbose:
      puts('train done: total {self.n_rows} rows, {self.n_trees} trees, each tree use {self.n_rows_per_tree} rows data')
      t1, t2, t3, *_, tn = self.trees
      pprint(t1.decision_tree)
      pprint(t2.decision_tree)
      pprint(t3.decision_tree)
      pprint('------------------------')
Exemplo n.º 18
0
def test_documasonry_generate():
  template_paths = [os.getcwd() + '/test/test_templates/_test_{{项目名称}}-申请表.xls',
                    os.getcwd() + '/test/test_templates/test_{{name}}_面积计算表.doc',
                    os.getcwd() + '/test/test_templates/test_{{测试单位}}-宗地图.dwg',
                    os.getcwd() + '/test/test_templates/test_no_field_面积计算表.doc',
                    ]
  output_path = os.getcwd() + '/test/test_output'
  masonry = Documasonry(output_path=output_path, template_paths=template_paths)
  text = '''
         项目名称: test1
         单位名称: test2
         地籍号: 110123122
         name: sjgisdgd
         面积90: 124.1
         面积80: 234.2
         area: 124.2
         测试单位: testconm
         title: testtitle
         project: pro.
         date: 20124002
         ratio: 2000
         landcode: 235
         area80: 94923
         area90: 3257
         '''
  info = InfoText.from_string(text)
  masonry.generate(info=info, save=True, add_index=True) | puts()
Exemplo n.º 19
0
 def points_list_to_variant(self, coord):
   import pylon
   data = list(float(x) for x in pylon.flatten(coord)) | pylon.puts()
   if len(data) % 2 == 1:
     raise AttributeError("point_to_variant: coord length must be even")
   com_seq = win32com.client.VARIANT(pythoncom.VT_ARRAY | pythoncom.VT_R8, data)
   return com_seq
Exemplo n.º 20
0
def save_image(url, folder, image_name=None):
    if not image_name:
        file_name = folder + "/" + url.split("/")[-1]
    else:
        ext = url.split(".")[-1]
        file_name = folder + "/" + to_file_name(image_name) + "." + ext

    if os.path.exists(file_name):
        puts("-- already exist file_name")
        return

    r = requests.get(url, stream=True)
    if r.status_code == 200:
        with open(file_name, "wb") as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)
    puts("save done! url >> file_name")
Exemplo n.º 21
0
def test_documasonry_detect_fields():
  template_paths = [os.getcwd() + '/test/test_templates/_test_{{项目名称}}-申请表.xls',
                    os.getcwd() + '/test/test_templates/test_{{name}}_面积计算表.doc',
                    os.getcwd() + '/test/test_templates/test_{{测试单位}}-宗地图.dwg',
                    os.getcwd() + '/test/test_templates/test_no_field_面积计算表.doc',
                    ]
  output_path = os.getcwd() + '/test/test_output'
  masonry = Documasonry(output_path=output_path, template_paths=template_paths)
  masonry.detect_required_fields() | puts()
Exemplo n.º 22
0
  def save(self, info, close=True, prefix=''):
    self.output_name = prefix + evalute_field(os.path.basename(self.template_path), info)
    output_path = os.path.join(self.output_folder, self.output_name)
    output_path = output_path.replace('\\', '/')
    output_path = output_path.replace('/', '\\')
    if os.path.exists(output_path):
      fix = time.strftime('.backup-%Y%m%d-%H%M%S')
      os.rename(output_path, fix.join(os.path.splitext(output_path)))
    try:
      self.document.SaveAs(output_path)
      puts('save document done - output_path')
    except Exception:
      raise
      t = 'Word Filler can not save document: <{}>'.format(output_path)
      raise SaveDocumentError(t)

    if close:
      self.document.Close()
Exemplo n.º 23
0
def test_scan_entities():
  '''扫描选中对象的信息

  对于多段线额外统计 闭合线总面积 开放线总长度
  hole: 将多段线视为外部包裹线和内部孔洞, 统计面积时以最大面积减去其他较小的
  '''
  r = cad.scan_entities(selecting_entities,
                        hole=True, error_color=None, error_layer=None)
  for line in r:
    line | puts()
Exemplo n.º 24
0
    def train(self):
        for i in range(self.n_trees):

            features_index = random.sample(list(range(self.n_features)),
                                           self.n_features_per_tree)
            self.features_index_for_trees.append(features_index)
            data_filtered = self.filter_traindata(features_index)
            labels_filtered = self.filter_labels(features_index)
            tree = DecisionTree(data_filtered, labels=labels_filtered)
            self.trees.append(tree)
        if self.verbose:
            puts(
                'train done: total {self.n_rows} rows, {self.n_trees} trees, each tree use {self.n_rows_per_tree} rows data'
            )
            t1, t2, t3, *_, tn = self.trees
            pprint(t1.decision_tree)
            pprint(t2.decision_tree)
            pprint(t3.decision_tree)
            pprint('------------------------')
Exemplo n.º 25
0
def test_rebuild_arc_polyline():
  '''将加密后的poly转为圆弧poly
  处理选中的多段线
  如果有选中的点, 将这些点作为分隔符'''
  cad = AutoCAD()
  polylines = []
  points = []
  for en in cad.selecting():
    if en.entity_type == 'Point':
      points.append(en)
    elif en.entity_type == 'Polyline' and en.closed:
      polylines.append(en)

  dist = 3

  for pl in polylines:
    arcpl = cad.rebuild_arc_polyline(pl, threshold=dist,
                                     segment_points=[(p.x, p.y) for p in points])
    arcpl.color = 'yellow'
    report = '转为圆弧poly 原面积={:.4f} 新面积={:.4f} 相差={:.4f} ({:.4%})'
    puts(report.format(pl.area, arcpl.area, arcpl.area - pl.area, (arcpl.area-pl.area)/pl.area))
Exemplo n.º 26
0
def parse_incredibal(path):

  text = open(path).read()
  doc = PyQuery(text)

  for elem in doc('article'):
    elem = PyQuery(elem)

    # elem('div h3>a').text() | puts()
    url = elem('figure div img').attr('src') | puts()

    # file_name = elem('figure div img').attr('src').split('/')[-1] | puts()
    save_image(url, folder='food')
Exemplo n.º 27
0
def test_word_filler_detect_fields():


  t1 = os.getcwd() + '/test/test_templates/test_{{name}}_面积计算表.doc'
  t2 = os.getcwd() + '/test/test_templates/test_no_field_面积计算表.doc'
  from infotext import InfoText
  yaml_info = InfoText.from_yaml(os.getcwd() + '/test/测试单位.inf')


  filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output')
  filler.detect_required_fields(unique=False) | puts()
  filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output')
  filler.detect_required_fields(unique=True) | puts()


  filler = Filler(template_path=t2, output_folder=os.getcwd() + '/test/test_output')
  filler.detect_required_fields(unique=False) | puts()
  filler = Filler(template_path=t2, output_folder=os.getcwd() + '/test/test_output')
  filler.detect_required_fields(unique=True) | puts()


  filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output')
  filler.render(info=yaml_info)
Exemplo n.º 28
0
def kmeans(dataSet, k):
    numSamples = dataSet.shape[0]
    # first column stores which cluster this sample belongs to,
    # second column stores the error between this sample and its centroid
    clusterAssment = mat(zeros((numSamples, 2)))
    clusterChanged = True

    ## step 1: init centroids
    centroids = initCentroids(dataSet, k)

    while clusterChanged:
        clusterChanged = False
        ## for each sample
        for i in range(numSamples):
            minDist  = 100000.0
            minIndex = 0
            ## for each centroid
            ## step 2: find the centroid who is closest
            for j in range(k):
                distance = euclDistance(centroids[j, :], dataSet[i, :])
                if distance < minDist:
                    minDist  = distance
                    minIndex = j

            ## step 3: update its cluster
            if clusterAssment[i, 0] != minIndex:
                clusterChanged = True
                clusterAssment[i, :] = minIndex, minDist**2

        ## step 4: update centroids
        for j in range(k):
            pointsInCluster = dataSet[nonzero(clusterAssment[:, 0].A == j)[0]]
            centroids[j, :] = mean(pointsInCluster, axis = 0)

    pylon.puts('Congratulations, cluster complete!')
    return centroids, clusterAssment
Exemplo n.º 29
0
def test_find_nearest_text():
  cad = AutoCAD()
  entities = list(cad.selecting())
  numbers = []
  names = []
  for text in entities:
    if text.color == 'black':
      names.append(text)
    else:
      numbers.append(text)
  from Converter import SpaceCoordinate
  dist = SpaceCoordinate().distance2
  for number in numbers:
    near = min(names, key=lambda name: dist(name.mid_point, number.mid_point)) | puts()
    near.color = 'cyan'
Exemplo n.º 30
0
def parse_packaging_served(path):
    text = open(path).read()
    doc = PyQuery(text)
    result = []
    for elem in doc("li>div"):
        elem = PyQuery(elem)

        # elem('div h3>a').text() | puts()
        # name = elem('.cover-name>a').text() | puts()
        title = elem(".cover-img img.cover-img-standard").attr("title")
        image = elem(".cover-img img.cover-img-standard").attr("src")
        detail = elem(".cover-name a.cover-name-link").attr("href")
        result.append((title, image, detail))
        # save_image(image, folder='packaging_served_images_preview',
        #            image_name=title)

    # result | puts()

    for i, (title, image, detail) in enumerate(result):
        if i < 16:
            continue
        puts("@ start parse folder i title detail")
        site = "http://www.packagingserved.com"
        parse_packaging_served_detail_page(url=site + detail, folder=title)
Exemplo n.º 31
0
def test_jinja_edge_cases():
  from infotext import InfoText
  text = '''
    codes: 1231234000050280000
    borders: 空地;空地;空地;空地
  '''
  template = '''
    {{(codes | string)[12:15]}}-01
    {{(codes | string)[12:15]}}-01
  '''

  info = InfoText.from_string(text)
  t = Template(template)
  result = t.render(**info.content)
  result | puts()
Exemplo n.º 32
0
def showCluster(dataSet, k, centroids, clusterAssment):


    numSamples, dim = dataSet.shape
    if dim != 2:
        pylon.puts("Sorry! I can not draw because the dimension of your data is not 2!")
        return 1

    mark = ['or', 'ob', 'og', 'ok', '^r', '+r', 'sr', 'dr', '<r', 'pr']
    if k > len(mark):
        pylon.puts("Sorry! Your k is too large! please contact Zouxy")
        return 1

    # draw all samples
    for i in range(numSamples):
        markIndex = int(clusterAssment[i, 0])
        plt.plot(dataSet[i, 0], dataSet[i, 1], mark[markIndex])

    mark = ['Dr', 'Db', 'Dg', 'Dk', '^b', '+b', 'sb', 'db', '<b', 'pb']
    # draw the centroids
    for i in range(k):
        plt.plot(centroids[i, 0], centroids[i, 1], mark[i], markersize = 12)

    plt.show()
Exemplo n.º 33
0
  def render(self, info):
    self.info = info
    self.app.Visible = True
    if self.info.get('target_position'):
      # 如含有 target_position 字段 编辑前需要调整模板全体 object 位置
      target_position = self.info.content['target_position']
      if isinstance(target_position, list) and len(target_position) == 4:
        # target_center and target_size [566371.2180, 4340932.6223, 202.3, 202.3]
        self.fix_position(target_center=target_position[:2],
                          target_size=target_position[2:])
      elif isinstance(target_position, str):
        if not os.path.isfile(target_position):
          target_position = os.path.join(self.output_folder, target_position)

        self.insert_block(dwg_path=target_position)
        last_entity = list(self.entities(kinds='BlockReference'))[0] | puts()
        target_center = self.mid_point(last_entity)
        target_size = self.bounding_box_size(last_entity)
        # print(last_entity.)
        last_entity.Delete()
        self.fix_position(target_center=target_center, target_size=target_size)



    for en in self.text_entities():
      val = evalute_field(field=en.TextString, info=info)
      if val in (None, ''):
        raise InfoKeyError('无法找到字段的值 {}'.format(en.TextString))

      if en.TextString.startswith('{{') and en.TextString.endswith('dwg}}'):
        # block field syntax should insert dwg block
        if not os.path.isfile(val):
          val = os.path.join(self.output_folder, val)
        self.insert_block(dwg_path=val)
        en.Delete()
      else:
        en.TextString = val
    self.document.SendCommand('zoom e ')
Exemplo n.º 34
0
def test_cad_filler_detect_fields():
  t1 = os.getcwd() + '/test/test_templates/test_{{测试单位}}-宗地图.dwg'
  filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output')
  filler.detect_required_fields(unique=False) | puts()
  filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output')
  filler.detect_required_fields(unique=True) | puts()
Exemplo n.º 35
0
def test_remove_same_point_polyline():
  '移除poly中的重复节点'
  cad = AutoCAD()
  for pl in cad.selecting():
    plnew, report = cad.remove_same_points_polyline(pl, threshold=0.0001)
    puts(report)