def handle_paginators(client_name, class_name, service_name, service_path, sidebar_lines): try: model = botocore.session.get_session().get_paginator_model(client_name) except botocore.exceptions.UnknownServiceError: return paginator_config = model._paginator_config if not paginator_config: return paginator_names = list(paginator_config.keys()) if not paginator_names: return paginators_path = f'{service_path}/paginators' sidebar_lines.append(f' - [Paginators]({paginators_path})') docs_paginators_path = f'docs/{paginators_path}.md' example_paginator_name = paginator_names[0] paginator_list_items = create_paginator_index(docs_paginators_path, client_name, service_name, example_paginator_name) for name, paginator in sorted(paginator_config.items()): pythonic_name = pythonic.xform_name(name) paginator_path = f'{paginators_path}/{pythonic_name}' docs_pagination_path = f'docs/{paginator_path}.md' create_new_file(docs_pagination_path) list_item, signature, documentation, headline = get_paginator_page( name, pythonic_name, client_name, class_name, paginator, paginator_path, service_path) create_new_file(docs_pagination_path) write_lines(docs_pagination_path, [headline, documentation, signature]) paginator_list_items.append(list_item) write_lines(docs_paginators_path, paginator_list_items)
def vl_sift(im, frames=None, orientations=False, peak_thresh=None, edge_thresh=None): """ Compute SIFT keypoints and descriptors using VLFeat binary. Should be thread-safe. """ ut.check(frames is None or frames.shape[1] == 4) # frame_fname = '../tmp/vl_frames.frame' # im_fname1 = '../tmp/vl_im.png' # im_fname2 = '../tmp/vl_im.pgm' # out_fname = '../tmp/vl_out.sift' frame_fname = ut.make_temp('.frame') im_fname1 = ut.make_temp('.png') im_fname2 = ut.make_temp('.pgm') out_fname = ut.make_temp('.sift') #ut.write_lines(frame_fname, ('%f %f %f 0 0 %f' % (pt[0], pt[1], s, s) for pt in pts for s in scales)) ig.save(im_fname1, im) os.system('convert %s %s' % (im_fname1, im_fname2)) frame_opt = '' if frames is not None: ut.write_lines(frame_fname, ('%f %f %f %f' % tuple(f) for f in frames)) frame_opt = '--read-frames %s' % frame_fname orientation_opt = '--orientations' if orientations else '' peak_opt = '--peak-thresh %f' % peak_thresh if peak_thresh is not None else '' edge_opt = '--edge-thresh %f' % edge_thresh if edge_thresh is not None else '' ut.sys_check("%s %s %s %s -o %s %s %s" % (SiftPath, im_fname2, frame_opt, orientation_opt, out_fname, peak_opt, edge_opt)) sift = read_sift(out_fname) os.system('rm %s %s %s' % (im_fname1, im_fname2, out_fname)) return sift
def handle_collections(collections, resource_list_items, resource_path, class_name, service_model, client_name, service_path): if collections: resource_list_items.extend( ['# Collections', 'These are the available collections:']) collection: Collection for collection in collections: name = collection.name collection_path = f'{resource_path}/collections/{name}' docs_collection_path = f'docs/{collection_path}.md' list_item = f'- **[{name}]({collection_path})**' resource_list_items.append(list_item) resource_name = collection.resource.model.name op_name = collection.request.operation param_str = get_param_str_from_operation(op_name, service_model) collection_list_items = create_collection_page( docs_collection_path, name, resource_name, class_name, param_str, client_name, service_path, op_name, resource_path, ) handle_batch_actions(client_name, collection, collection_list_items, service_path) write_lines(docs_collection_path, collection_list_items) if collections: resource_list_items.append('') # newline
def handle_client_operation(class_name, client_list_items, client_name, client_path, name, service_model, shapes_path): fn_name = pythonic.xform_name(name) method_path = f'{client_path}/operations/{fn_name}.md' list_item, signature, documentation, headline = get_method_page( client_name, class_name, service_model, name, method_path, shapes_path) docs_method_path = f'docs/{method_path}' create_new_file(docs_method_path) write_lines(docs_method_path, [headline, documentation, signature]) client_list_items.append(list_item)
def _write_llvm_index(self) -> None: """Record the list of files and links installed by Clang.""" flist = [] install_dir = self.cfg.target_llvm_dir for root, _, files in os.walk(install_dir): for fname in files: full_path = os.path.abspath(os.path.join(root, fname)) if os.path.isfile(full_path) or os.path.islink(full_path): flist.append(os.path.relpath(full_path, install_dir)) util.write_lines(sorted(flist), os.path.join(self.cfg.build_dir, 'llvm-index.txt'))
def handle_shapes(service_model: ServiceModel, class_name, shapes_path): top_level_shapes = [(service_model.shape_for(name), class_name) for name in service_model.shape_names] if not top_level_shapes: return docs_shapes_path = f'docs/{shapes_path}' create_new_file(docs_shapes_path) service_name = get_service_name(service_model) all_shapes = find_all_shapes(top_level_shapes) shape_docs = [get_shape_doc(shapes_path, shape) for shape in all_shapes] write_lines(docs_shapes_path, [f'# {service_name} data types'] + shape_docs)
def brute(docs_path, qrys_path, out_path): docs = preprocess(read_lines(docs_path)) qrys = preprocess(read_lines(qrys_path)) def process_queries(): for qry_id, qry in qrys: candidates = (doc_id for doc_id, doc in docs if matches(qry, doc)) retrieved = nlargest(5, candidates) yield qry_id, retrieved result_strs = imap(to_str, process_queries()) write_lines(out_path, result_strs)
def handle_waiter(class_name, client_name, name, service_path, waiter_list_items, waiter_model, waiters_path): waiter = waiter_model.get_waiter(name) pythonic_name = pythonic.xform_name(waiter.operation) waiter_path = f'{waiters_path}/{pythonic.xform_name(name)}' docs_waiter_path = f'docs/{waiter_path}.md' create_new_file(docs_waiter_path) list_item, signature, documentation, headline = get_waiter_page( name, pythonic_name, client_name, class_name, waiter_path, service_path) create_new_file(docs_waiter_path) write_lines(docs_waiter_path, [headline, documentation, signature]) waiter_list_items.append(list_item)
def docs(docs_path, qrys_path, out_path): docs = preprocess(read_lines(docs_path)) qrys = preprocess(read_lines(qrys_path)) index = inverted_index(docs) def process_queries(): for qry_id, qry in qrys: inv_lists = (index[term] for term in qry) retrieved = simple_kway_merge(inv_lists)[::-1][:5] yield qry_id, retrieved result_strs = imap(to_str, process_queries()) write_lines(out_path, result_strs)
def _write_version_file(cfg: config.Config, version: repos.LLVMBMTC, target_dir: str) -> None: """Create VERSION.txt in the install directory.""" dest = os.path.join(target_dir, 'VERSION.txt') toolchain_ver = 'LLVM Embedded Toolchain for Arm ' + cfg.version_string if cfg.verbose: logging.info('Writing "%s" to %s', toolchain_ver, dest) lines = [toolchain_ver, '', 'Components:'] for name in sorted(version.modules.keys()): comp_info = version.modules[name].checkout_info lines.append('* {}'.format(comp_info)) if cfg.verbose: logging.info('Writing component %s info: "%s"', name, comp_info) util.write_lines(lines, dest)
def handle_resource_actions(client_name, class_name, list_items, resource_path, service_model, shapes_path, actions): if actions: list_items.extend(['# Actions', 'These are the available actions:']) for action in actions: fn_name = action.name method_path = f'{resource_path}/operations/{fn_name}.md' list_item, signature, documentation, headline = handle_resource_action( client_name, class_name, action, method_path, fn_name, service_model, shapes_path, resource_path ) docs_method_path = f'docs/{method_path}' create_new_file(docs_method_path) write_lines(docs_method_path, [headline, documentation, signature]) list_items.append(list_item) if actions: list_items.append('') # newline
def terms(docs_path, qrys_path, out_path): docs = preprocess(read_lines(docs_path)) qrys = preprocess(read_lines(qrys_path)) inv_index = inverted_index(docs) def process_queries(): for qry_id, qry in qrys: candidates = set(inv_index[qry.pop()]) for term in qry: candidates.intersection_update(inv_index[term]) retrieved = nlargest(5, candidates) yield qry_id, retrieved result_strs = imap(to_str, process_queries()) write_lines(out_path, result_strs)
def handle_client(client, client_name, class_name, service_path, sidebar_lines, shapes_path): print('handling client', client_name) service_model: ServiceModel = client._service_model service_name = get_service_name(service_model) service_id = service_model.service_id client_path = f'{service_path}/client' sidebar_lines.append(f' - [{service_id} client]({client_path})') docs_client_path = f'docs/{client_path}.md' client_list_items = create_client_index(docs_client_path, client_name, service_name, class_name) for name in service_model.operation_names: handle_client_operation(class_name, client_list_items, client_name, client_path, name, service_model, shapes_path) write_lines(docs_client_path, client_list_items) handle_shapes(service_model, class_name, shapes_path)
def handle_resources(client, resource_name, class_name, service_name, service_path, sidebar_lines): try: resource: ServiceResource = boto3.resource(resource_name) except boto3.exceptions.ResourceNotExistsError: return service_model: ServiceModel = client._service_model service_id = service_model.service_id shapes_path = f'{service_path}/data-types.md' resource_path = f'{service_path}/resource' sidebar_lines.append(f' - [{service_id} resource]({resource_path})') docs_resource_path = f'docs/{resource_path}.md' resource_list_items = create_resource_index(docs_resource_path, resource_name, service_name, class_name) resource_model = resource.meta.resource_model actions = resource_model.actions handle_resource_actions(resource_name, class_name, resource_list_items, resource_path, service_model, shapes_path, actions) collections = resource_model.collections handle_collections(collections, resource_list_items, resource_path, class_name, service_model, resource_name, service_path) sub_resources = resource_model.subresources if sub_resources: resource_list_items.append('\n') resource_list_items.append('# Sub-resources') resource_list_items.append('These are the available sub-resources:') for sub_resource in sub_resources: handle_sub_resource( resource_name, sub_resource, sidebar_lines, resource_path, resource_list_items, class_name, service_model, shapes_path, service_path, ) write_lines(docs_resource_path, resource_list_items)
def write_cfg_files(cfg: config.Config, lib_spec: config.LibrarySpec) -> None: """Create target-specific configuration files for a single library variant.""" target = lib_spec.target base_cfg_lines = [ '--target={}'.format(target), lib_spec.flags, '-fuse-ld=lld', '-fno-exceptions -fno-rtti' ] # No semihosting and no linker script nosys_lines = base_cfg_lines + [ '$@/../lib/clang-runtimes/{}/lib/crt0.o'.format(target), '-lnosys', ] # Semihosting and linker script provided rdimon_lines = base_cfg_lines + [ '-Wl,-T$@/../lib/clang-runtimes/{}/base.ld'.format(target), '$@/../lib/clang-runtimes/{}/lib/rdimon-crt0.o'.format(target), '-lrdimon', ] # Semihosting, but no linker script, e.g. to use with QEMU Arm System # emulator rdimon_baremetal_lines = base_cfg_lines + [ '$@/../lib/clang-runtimes/{}/lib/rdimon-crt0.o'.format(target), '-lrdimon', ] cfg_files = [ ('nosys', nosys_lines), ('rdimon', rdimon_lines), ('rdimon_baremetal', rdimon_baremetal_lines), ] for name, lines in cfg_files: file_name = '{}_{}.cfg'.format(target, name) file_path = os.path.join(cfg.target_llvm_bin_dir, file_name) if cfg.verbose: logging.info('Writing %s', file_path) util.write_lines(lines, file_path)
def handle_sub_resource( resource_name, sub_resource, sidebar_lines, resource_path, resource_list_items, class_name, service_model, shapes_path, service_path, ): sub_resource_name = sub_resource.name sub_resource_path = f'{resource_path}/sub-resources/{sub_resource_name}' docs_sub_resource_path = f'docs/{sub_resource_path}.md' list_item = f'- **[{sub_resource_name}]({sub_resource_path})**' resource_list_items.append(list_item) sidebar_lines.append( f' - [{sub_resource_name} sub-resource]({sub_resource_path})') param_str = get_sub_resource_param_str(sub_resource) sub_resource_shape_name = sub_resource.resource.model.shape sub_resource_list_items = create_sub_resource_index( docs_sub_resource_path, resource_name, class_name, sub_resource_name, param_str, sub_resource_shape_name, shapes_path, ) actions = sub_resource.resource.model.actions handle_resource_actions(resource_name, class_name, sub_resource_list_items, sub_resource_path, service_model, shapes_path, actions) collections = sub_resource.resource.model.collections handle_collections(collections, sub_resource_list_items, sub_resource_path, class_name, service_model, resource_name, service_path) handle_sub_resource_waiters(sub_resource, sub_resource_list_items, service_path) write_lines(docs_sub_resource_path, sub_resource_list_items)
def handle_waiters(client, client_name, class_name, service_name, service_path, sidebar_lines): waiter_config = client._get_waiter_config() waiter_model = WaiterModel( waiter_config) if 'waiters' in waiter_config else None if not waiter_model: return waiters_path = f'{service_path}/waiters' sidebar_lines.append(f' - [Waiters]({waiters_path})') docs_waiters_path = f'docs/{waiters_path}.md' waiter_names = waiter_model.waiter_names example_waiter_name = waiter_names[0] waiter_list_items = create_waiter_index(docs_waiters_path, client_name, service_name, example_waiter_name) for name in waiter_names: handle_waiter(class_name, client_name, name, service_path, waiter_list_items, waiter_model, waiters_path) write_lines(docs_waiters_path, waiter_list_items)
#print(vec2) #stdin.readline() cos_dist = (1 - (np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)))) cos_sim = 1 - cos_dist asym_dist = (np.linalg.norm(vec1) - np.linalg.norm(vec2)) / ( np.linalg.norm(vec1) + np.linalg.norm(vec2)) score = cos_dist + 1.0 * asym_dist #pred_scores.append(score) print(cos_sim) pred_scores.append(cos_dist) return pred_scores me = modelExecutor() me.restore_best() preds = me.evaluate() if len(valid_pairs) != len(preds): raise ValueError("Unexpected number of predictions!") print("SIMLEX") correl = stats.spearmanr(preds, [x[2] for x in valid_pairs]) print(correl) out_lines = [ valid_pairs[i][0] + "\t" + valid_pairs[i][1] + "\t" + str(preds[i]) for i in range(len(valid_pairs)) ] util.write_lines(args.preds_path, out_lines) print("Predictions successfully written to the output file.")
and not related_word.endswith('-') and related_word[:( -1)] in word and len(related_word) < len(word)) ] if candidate_lemmas != []: related_roots = get_roots(max(candidate_lemmas, key=len)) # get the longest one return related_roots # In[ ]: root_dict = {} for word in tqdm(vocab): print(word) try: roots = get_roots(word) root_dict[word] = roots except: print(f"Couldn't find word {word}.") continue # In[ ]: lines = [] for entry in entries: line = '\t'.join(entry + [', '.join(root_dict[entry[1]])]) lines.append(line) write_lines('word_freq_list_with_roots.txt', lines) driver.quit()
def _copy_licenses(cfg: config.Config) -> None: """Copy licenses, including third-party licenses; create THIRD-PARTY-LICENSES.txt.""" logging.info('Copying license files') _force_copy(cfg.source_dir, cfg.target_llvm_dir, 'LICENSE.txt') # Contents of THIRD-PARTY-LICENSES.txt: tp_lic_lines = [ 'This product embeds and uses the following pieces of software which ' 'have', 'additional or alternate licenses:', ] tp_license_dir = os.path.join(cfg.target_llvm_dir, 'third-party-licenses') if os.path.exists(tp_license_dir): shutil.rmtree(tp_license_dir) os.makedirs(tp_license_dir) # Add a component with a single license file. The file is renamed to avoid # name clashes. def add_license(comp_name, src_path, dest_name): tp_lic_lines.append(' - {}: third-party-licenses/{}'.format(comp_name, dest_name)) dest_path = os.path.join(tp_license_dir, dest_name) if cfg.verbose: logging.info('Copying %s to %s', src_path, dest_path) shutil.copy2(src_path, dest_path) # Add a component with multiple license files. # License files are not renamed. def add_multiple_licenses(comp_name, paths): lic_files = [] for src_path in paths: fname = os.path.basename(src_path) dest_path = os.path.join(tp_license_dir, fname) if cfg.verbose: logging.info('Copying %s to %s', src_path, dest_path) shutil.copy2(src_path, dest_path) lic_files.append('third-party-licenses/{}'.format(fname)) tp_lic_lines.append(' - {}: {}'.format(comp_name, ', '.join(lic_files))) llvm_components = [ ('LLVM', 'llvm'), ('Clang', 'clang'), ('lld', 'lld'), ('compiler-rt', 'compiler-rt'), ('libc++', 'libcxx'), ('libc++abi', 'libcxxabi'), ] for comp_name, comp_dir in llvm_components: src_path = os.path.join(cfg.llvm_repo_dir, comp_dir, 'LICENSE.TXT') dest_name = '{}-LICENSE.txt'.format(comp_dir.upper()) add_license(comp_name, src_path, dest_name) newlib_lic_names = ['COPYING.NEWLIB', 'COPYING.LIBGLOSS'] newlib_lic_files = [os.path.join(cfg.newlib_repo_dir, n) for n in newlib_lic_names] add_multiple_licenses('Newlib', newlib_lic_files) if cfg.is_using_mingw and cfg.copy_runtime_dlls: mingw_lic_path = os.path.join(cfg.source_dir, 'mingw-licenses') mingw_lic_files = [os.path.join(mingw_lic_path, n) for n in os.listdir(mingw_lic_path)] add_multiple_licenses('MinGW runtime DLLs', mingw_lic_files) tp_lic_lines += [ '', 'Newlib and libgloss licenses refer to the source files of the ' 'corresponding', 'libraries. To examine the source code please download the source ' 'package of ', 'the LLVM Embedded Toolchain for Arm {} from'.format(cfg.revision), 'https://github.com/ARM-software/LLVM-embedded-toolchain-for-Arm/' 'releases.', ] tp_lic_file = os.path.join(cfg.target_llvm_dir, 'THIRD-PARTY-LICENSES.txt') if cfg.verbose: logging.info('Creating %s', tp_lic_file) util.write_lines(tp_lic_lines, tp_lic_file)
except: pass return changed max_iter = 5 for i in range(max_iter): changed = replace_roots_recursive() if not changed: print("Nothing changed after %d iterations." % i) break # In[ ]: # word_roots_dict = load_pickle("word_roots_dict.pkl") str_lines = [] for line in list(word_roots_dict.items()): str_lines.append(line[0] + "\t" + ", ".join(line[1])) write_lines("word_roots_dict.txt", str_lines) dump_pickle("word_roots_dict.pkl", word_roots_dict) # In[ ]: all_roots = list(itertools.chain(*list(word_roots_dict.values()))) freq_dist = nltk.FreqDist(all_roots).most_common() write_lines("root_freq_dist.txt", freq_dist) dump_pickle("root_freq_dist.pkl", freq_dist) # print(freq_dist) # In[ ]:
if os.path.exists(intermediate_file): new_vocab = load_pickle(intermediate_file) print(f"Loaded intermediate .pkl file with {len(new_vocab)} entries") else: new_vocab = OrderedDict() # In[6]: for i, (word, lst) in tqdm(enumerate(list(vocab.items()))): if word in new_vocab: continue try: num_related = get_num_related(word) except: num_related = -1 new_vocab[word] = [lst[0], str(num_related)] + lst[2:] if i % 25 == 0 and i > 0: # store intermediate result every ? iterations dump_pickle("intermediate_importance.pkl", new_vocab) updated_entries = ['\t'.join([k] + v) for k, v in new_vocab.items()] write_lines('importance.txt', updated_entries) updated_entries = ['\t'.join([k] + v) for k, v in new_vocab.items()] write_lines('importance.txt', updated_entries) driver.close() # In[ ]: # In[ ]:
def label_box(seq, root=0, side_len1=None, side_len2=None, side_len3=None, y_flip=True, mode='normal'): print seq if type(seq) == type(''): scan = dset.Scan(seq, None) else: scan = seq seq = scan.path if mode == 'normal': _, _, tracks = dset.read_bundler(scan.bundle_file, scan.full_shape) pts = np.array([t[0] for t in tracks]) proj = scan.project(root, pts) w = 1 pylab.clf() im_with_pts = ig.draw_pts(scan.im(root), proj, width=w) pylab.imshow(im_with_pts) rect = ut.bbox2d(pylab.ginput(2, timeout=-1)) #rect = (1782.005828476269, 1431.7364696086595, 529.75936719400488, 354.40549542048279) print rect ok = ut.land(rect[0] <= proj[:, 0], proj[:, 0] <= rect[0] + rect[2], rect[1] <= proj[:, 1], proj[:, 1] <= rect[1] + rect[3]) pts_in_box = pts[ok] thresh = pylab.dist(scan.center(root), scan.center(root + 1)) / 50. plane, _ = planefit.fit_plane_ransac(pts_in_box, thresh) if plane[1] < 0 and y_flip: plane *= -1 ins = planefit.plane_inliers(plane, pts, thresh) pylab.clf() colors = np.zeros_like(pts) colors[:, 0] = 255 colors[ins] = (0, 255, 0) im_ins = ig.draw_pts(scan.im(root), map(ut.itup, proj), map(ut.itup, colors), width=w) pylab.clf() pylab.imshow(im_ins) if not input('ok? '): return print 'click 2 points (used to recalibrate the plane)' rect = ut.bbox2d(pylab.ginput(2, timeout=-1)) ok = ut.land(rect[0] <= proj[:, 0], proj[:, 0] <= rect[0] + rect[2], rect[1] <= proj[:, 1], proj[:, 1] <= rect[1] + rect[3]) pts_in_box = pts[ok] print 'plane before', plane plane[3] = -np.median(np.dot(pts_in_box, plane[:3])) print 'plane after', plane[3] if 1: print 'hack' im_ins = scan.im(root) pylab.clf() pylab.imshow(im_ins) print 'click 3 base points' px = pylab.ginput(3, timeout=-1) #px = [(2270.2989175686921, 1482.9937552039967), (2297.2764363030801, 1555.8330557868442), (2405.1865112406322, 1550.4375520399667)] def backproj(p): ray = ut.normalized( np.dot(mvg.pixel_ray_matrix(scan.R(root), scan.K(root)), ut.homog(p))) c = scan.center(root) dist = (-plane[3] - np.dot(c, plane[:3])) / np.dot(ray, plane[:3]) assert dist >= 0 pt = c + ray * dist print planefit.dist_to_plane(plane, pt[np.newaxis, :]) return pt sc = 1. while 1: cb = np.array(map(backproj, px)) v1 = cb[0] - cb[1] v2 = cb[2] - cb[1] if side_len1 is None: side_len1 = 0.5 * (np.linalg.norm(v1) + np.linalg.norm(v2)) if side_len2 is None: side_len2 = side_len1 if side_len3 is None: side_len3 = side_len1 a1 = sc * side_len1 a2 = sc * side_len2 a3 = sc * side_len3 print 'side1', a1, 'side2', a2, 'side3', a3, 'thresh =', thresh, \ 'v1 =', np.linalg.norm(v1), 'v2 = ', np.linalg.norm(v2) R = np.zeros((3, 3)) cr = ut.normalized(np.cross(v1, plane[:3])) cr *= np.sign(np.dot(cr, v2)) R[0] = a1 * ut.normalized(v1) R[1] = a2 * ut.normalized(cr) R[2] = a3 * ut.normalized(plane[:3]) print ut.normax(R, 1) mesh_pts = [] for zi in xrange(2): for yi in xrange(2): for xi in xrange(2): mesh_pts.append(cb[1] + R[0] * xi + R[1] * yi + R[2] * zi) face_idx = -1 + np.array( [[1, 2, 4, 3], np.array([1, 2, 4, 3]) + 4, [1, 2, 2 + 4, 1 + 4], [2, 4, 4 + 4, 2 + 4], [4, 3, 3 + 4, 4 + 4], [3, 1, 1 + 4, 3 + 4]]) mesh = box.Mesh(face_idx, mesh_pts, texsize=128) # show a preview scan_ = dset.Scan(seq) ig.show( [[1 + i, box.draw_faces(mesh, scan_, i, hires=0), scan_.im(i)] for i in [root, root + 1]]) if input('ok? '): box.save_mesh(ut.pjoin(seq, 'cube.mat'), mesh) break else: sc = float(input('scale? ')) time.sleep(2) else: mesh = box.load_from_mat(ut.pjoin(seq, 'cube.mat')) scan = dset.Scan(seq, use_cams_file=False) print 'Already marked as bad:' good_cams_file = os.path.join(scan.path, 'good_cams.txt') if os.path.exists(good_cams_file): inds = map(int, open(good_cams_file, 'r').read().split()) file_ids = map(scan.file_index, scan.im_files) bad = sorted(set(file_ids) - set(inds)) print '\n'.join(map(str, bad)) if 1: ig.show([[ scan.file_index(scan.im_files[frame]), box.draw_faces(mesh, scan, frame, hires=0) ] for frame in xrange(scan.length)]) inp = input('Bad cameras (as string): ') if inp != 'skip': bad_cams = map(int, inp.split()) all_idx = map(scan.file_index, scan.im_files) good_cams = sorted(set(all_idx) - set(bad_cams)) ut.write_lines(ut.pjoin(seq, 'good_cams.txt'), map(str, good_cams))
dataset_dict_filename = "Dataset.dict.pkl" raw_filenames = ["raw_training_text", "raw_valid_text", "raw_test_text"] suffix = ".txt" # In[ ]: dataset_dict = load_pickle(join(path, dataset_dict_filename)) index2token = {index: token for (token, index, _, _) in dataset_dict} token2index = {token: index for (token, index, _, _) in dataset_dict} vocab = list(token2index.keys()) indices = list(token2index.values()) vocab_file = join(path, "vocab.txt") if not exists(vocab_file): write_lines(vocab_file, vocab) # In[ ]: adversary = Adversary(vocab) # In[ ]: for filename in raw_filenames: file = join(path, "raw", filename + suffix) dialogues = read_lines(file) adv_dialogues = [] for dialogue in tqdm(dialogues): adv_dialogue = adversary.apply_strategy(strategy, dialogue) adv_dialogues.append(adv_dialogue) target_file = join(path, "raw", filename + "." + strategy + suffix)
def write_version_file(cfg: config.Config) -> None: """Create VERSION.txt in the install directory.""" dest = os.path.join(cfg.target_llvm_dir, 'VERSION.txt') if cfg.verbose: logging.info('Writing "%s" to %s', cfg.version_string, dest) util.write_lines([cfg.version_string], dest)
source_test_text.append("") if (len(line) == 3 and len(line[2]) <= upper_threshold_target and len(line[2]) >= lower_threshold_target): source_lst.append(line[0] + [end_token] + line[1] + [end_token]) # concatenate the first two turns target_lst.append(line[2] + [end_token]) if i == 2: source_test_text.append("X: " + ' '.join(line[0])) source_test_text.append("Y: " + ' '.join(line[1])) truncated_datasets.append([source_lst, target_lst]) print(len(datasets[0]), len(truncated_datasets[0][0])) source_test_text_file = data_path + "source_test_text.txt" write_lines(source_test_text_file, source_test_text) # Load word embedding model logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # model_path = './GoogleNews-vectors-negative300.bin' # model = KeyedVectors.load_word2vec_format(fname=model_path, binary=True) model = KeyedVectors.load_word2vec_format(fname=word2vec, binary=True) """ To extend classifier vocab on a new corpus, say MovieTriples: (0) get vocab_movie_shared, get vocab_politeness (1) get vocab_movie_word2vec by doing (vocab_movie_shared - vocab_politeness) (2) get vocab_movie_new by doing (vocab_movie_freq - vocab_politeness) Note that the classifier doesn't recognize start and end tokens! When doing classification, we should just remove start and end tokens, and then classify