示例#1
0
def process(arg):
    global not_deleted_list, update_time
    curs = _connect.cursor()
    res = curs.execute("SELECT BookId FROM libbook WHERE NOT (Deleted&1) and FileType = 'fb2' ")
    not_deleted_list = curs.fetchall()
    not_deleted_list = set([i[0] for i in not_deleted_list])
    curs.execute('SELECT * FROM librusec')
    update_time = curs.fetchone()[0]
    for fn in walk(arg):
        for ftype, z_filename, data in read_file(fn, zip_charset='utf-8'):
            process_file(fn, ftype, z_filename, data)
    if options.search_deleted:
        deleted = set()
        for fn in walk(options.search_deleted):
            bookid = base_name(fn, '.fb2.zip')
            try:
                bookid = int(bookid)
            except ValueError:
                continue
            if bookid in not_deleted_list:
                deleted.append(fn)
        for fn in deleted:
            for ftype, z_filename, data in read_file(fn, zip_charset='utf-8'):
                ret = process_file(fn, ftype, z_filename, data)
                if ret:
                    print_log('restore deleted:', bookid)
    print
    print 'processed:', stats.total
    print 'passed:', stats.passed
    print 'fixed:', stats.fixed
    print 'errors:', stats.errors
    if options.not_found:
        fd = open(options.not_found, 'w')
        for bookid in not_deleted_list:
            print >> fd, bookid
示例#2
0
    def update(self, transect):
        """
        Updates the container data to a profile that intersect the
        transect line.

        Returns nothing. Sets attributes as a side effect.

        Args:
            transect (LineString): A transect line.
        """
        Notice.info("Updating " + self.__class__.__name__)

        # Preprocess
        prepared = prep(transect.buffer(self.settings['buffer']))

        # Get the intersecting points
        points = filter(prepared.contains, self.lookup.keys())

        self.reset_data()
        self.names = []

        for point in points:
            name = self.lookup[point]
            self.names.append(name)
            print name,

            pattern = "^" + name + "_out.las"
            for fname in utils.walk(self.well_dir, pattern):
                # This is a loop but there should only be one matching file.
                well = Well(fname, null_subs=np.nan)
                print well.curves.names
                self.data.append(well)
                self.log_lookup[name] = self.data[-1]

            if not self.log_lookup.get(name):
                print
                self.data.append(None)

            sl_name = getattr(self, 'striplog', None)
            sl = None
            if sl_name and (name == self.feature_well):
                lexicon = Lexicon.default()
                pattern = "^" + name + ".*striplog.las"
                for fname in utils.walk(self.well_dir, pattern):
                    # Load the striplog.
                    sl = Well(fname, lexicon=lexicon, null_subs=np.nan)

                    # Add it to the well
                    self.log_lookup[name].add_striplog(sl.striplog[sl_name],
                                                       sl_name)

            self.coords.append(transect.project(point))
示例#3
0
    def update(self, transect):
        """
        Updates the container data to a profile that intersect the
        transect line.

        Returns nothing. Sets attributes as a side effect.

        Args:
            transect (LineString): A transect line.
        """
        Notice.info("Updating " + self.__class__.__name__)

        # Preprocess
        prepared = prep(transect.buffer(self.settings['buffer']))

        # Get the intersecting points
        points = filter(prepared.contains, self.lookup.keys())

        self.reset_data()
        self.names = []

        for point in points:
            name = self.lookup[point]
            self.names.append(name)
            print name,

            pattern = "^" + name + "_out.las"
            for fname in utils.walk(self.well_dir, pattern):
                # This is a loop but there should only be one matching file.
                well = Well(fname, null_subs=np.nan)
                print well.curves.names
                self.data.append(well)
                self.log_lookup[name] = self.data[-1]

            if not self.log_lookup.get(name):
                print
                self.data.append(None)

            sl_name = getattr(self, 'striplog', None)
            sl = None
            if sl_name and (name == self.feature_well):
                lexicon = Lexicon.default()
                pattern = "^" + name + ".*striplog.las"
                for fname in utils.walk(self.well_dir, pattern):
                    # Load the striplog.
                    sl = Well(fname, lexicon=lexicon, null_subs=np.nan)

                    # Add it to the well
                    self.log_lookup[name].add_striplog(sl.striplog[sl_name],
                                                       sl_name)

            self.coords.append(transect.project(point))
示例#4
0
    def build(self):
        if platform == 'android':
            perms = ["android.permission.READ_EXTERNAL_STORAGE",
                     "android.permission.WRITE_EXTERNAL_STORAGE",
                     "android.permission.CAMERA",
                     "android.permission.ACCESS_FINE_LOCATION"]
            haveperms = utils.acquire_permissions(perms)
            self.gps = plyer.gps
            self.gps.configure(self.gps_onlocation, self.gps_onstatus)
            import my_camera
            self.camera = my_camera.MyAndroidCamera()

        Window.bind(on_keyboard=self.popScreen)
        os.makedirs(utils.getDataDir() + "/images", exist_ok=True)
        self.markerMap = {}
        self.settings_cls = SettingsWithSidebar
        self.curMarker = None
        self.relocated = 0
        self.dialog = None

        self.baseConfig = config.Config()
        self.error = self.baseConfig.getErrors()
        if self.error:
            Clock.schedule_once(self.show_error, 2)
        self.store = JsonStore("base.json")
        self.root = Page()
        try:
            base = self.store.get("base")["base"]
            self.baseConfig.getBase(self.base)
        except:
            base = self.baseConfig.getNames()[0]
        print("base", base)
        print("----------- /data/user/0/de.adfcmuenchen.abstellanlagen")
        utils.walk("/data/user/0/de.adfcmuenchen.abstellanlagen")
        # print("----------- cwd", os.getcwd())
        # utils.walk(".")
        # print("------------getDataDir", utils.getDataDir())
        # utils.walk(utils.getDataDir())
        # print("------------getExternalFilesDir", utils.getExternalFilesDir())
        # utils.walk(utils.getExternalFilesDir())
        self.executor = ThreadPoolExecutor(max_workers=1)
        self.future = None

        laststored = self.getConfigValue("gespeichert")
        if not laststored:
            self.setConfigValue("gespeichert", time.strftime("%Y.%m.%d %H:%M:%S"))
        self.useGoogle = bool(self.getConfigValue("useGoogle"))
        self.setup(base)
        return self.root
示例#5
0
  def test_walk(self):
    data = {
      'foo': 'bar',
      'bam': {
        'foo': 'bar2',
        'foo2': ['bar3', 'bar4'],
      }
    }

    actual = []
    callback = lambda item, key, node: actual.append(item)
    utils.walk(data, callback)

    expected = ['bar', 'bar2', 'bar3', 'bar4']
    self.assertItemsEqual(expected, actual)
示例#6
0
  def test_walk(self):
    data = {
      'foo': 'bar',
      'bam': {
        'foo': 'bar2',
        'foo2': ['bar3', 'bar4'],
      }
    }

    actual = []
    callback = lambda item, key, node: actual.append(item)
    utils.walk(data, callback)

    expected = ['bar', 'bar2', 'bar3', 'bar4']
    self.assertItemsEqual(expected, actual)
示例#7
0
def cleanDirs(root, dirNamesList):
    import shutil

    folders = walk(root, targetDirs=dirNamesList).get("dirs")
    for dirSet in folders:
        path = os.path.join(root, *dirSet)
        shutil.rmtree(path, ignore_errors=True)
示例#8
0
def CDNF():
	t = 0
	if EQ(True) == True:
		return True
	else:
		u = EQ(True)
		
	label .zero
	t = t + 1
	(H[t], S[t], a[t]) = (False, None, u)
	
	label .one
	if EQ(invjoin(H, 'and')) == True:
		return invjoin(H, 'and')
	else:
		u = EQ(invjoin(H, 'and'))
	I = []
	for i in range(0, len(H)):
		if evaluate(u, H[i])==False:
			I.append(i)
	if I==[]:
		goto .zero
	mu = []
	for i in I:
		mu.append(walk(u, a[i]))
		S.append(mu[-1] ^ a[i])
	for i in range(0,t):
		H[i] = MDNF(S[i])
	goto .one
示例#9
0
def extract_dataset(dataset, extractor="arcface", gpu=-1):
    if extractor == "arcface":
        face = ArcFace(gpu)
    else:
        face = FaceNet(gpu)

    dataset_path = os.path.join(os.path.abspath(""), "images", dataset)

    file_cnt = len(walk(dataset_path))
    features = np.zeros((file_cnt, 513))
    features_flip = np.zeros((file_cnt, 513))

    image_cnt = 0
    for subject_id, subject in enumerate(os.listdir(dataset_path)):
        draw_progress(dataset + " " + extractor,
                      float(image_cnt + 1) / file_cnt)

        for image in os.listdir(os.path.join(dataset_path, subject)):
            image = cv2.imread(os.path.join(dataset_path, subject, image))

            feature = face.extract(image)
            features[image_cnt, :] = np.append(feature, subject_id + 1)

            feature_flip = face.extract(cv2.flip(image, 1))
            features_flip[image_cnt, :] = np.append(
                feature_flip, subject_id + 1)

            image_cnt += 1

    return features, features_flip
示例#10
0
def extractMarksThroughDataset(datasetPath,
                               categories=None,
                               subcategories=None,
                               parallel=False,
                               threads=16):
    # cleanOldMarks(datasetPath)
    frames = walk(datasetPath, targetDirs=const.frames).get("dirs")
    frames = filterFolders(frames, categories, subcategories)

    if parallel:
        threads = min(threads, mp.cpu_count())
    else:
        threads = 1

    threadsList = []
    with mp.Pool(threads) as pool:
        for dirsSet in frames:
            dirsSet = dirsSet[:-1]
            categoryDir = os.path.join(datasetPath, *dirsSet)

            threadsList.append(
                pool.apply_async(extractMarks, args=(categoryDir, )))

        for r in threadsList:
            r.get()
示例#11
0
def main():
    # parsing command-line options
    global options, markup
    option_list = [
        make_option("-o", "--out", dest="outfile",
                    help="write result to FILE", metavar="FILE"),
        make_option("-m", "--html", dest="html", action="store_true",
                    default=False, help="output in HTML"),
        make_option("-q", "--quiet", dest="quiet", action="store_true",
                    default=False, help="show errors only"),
        ]
    parser = OptionParser(option_list=option_list,
                          usage="usage: %prog [options] files|dirs",
                          version="%prog "+prog_version)
    options, args = parser.parse_args()
    LogOptions.level = 0                # show all errors
    if options.html:
        markup = html_markup
        LogOptions.br = '<br />'
        LogOptions.escape = True
    if options.outfile:
        LogOptions.outfile = open(options.outfile, 'at')
    else:
        LogOptions.outfile = sys.stdout
    errors = 0
    for f in walk(args):
        errors += process_file(f)
    sys.exit(errors)
示例#12
0
def updateCategoriesIndices(datasetPath, categories):
    from utils import walk, makeJSONname
    from verifier import getFullCategory

    marks = walk(datasetPath, targetFiles=makeJSONname(const.marks)).get("files")

    for mrk in marks:
        try:
            marksPath = os.path.join(datasetPath, *mrk)

            category, subcategory = mrk[-3:-1]
            fullCategory = getFullCategory(category, subcategory)

            if fullCategory not in categories:
                continue

            marks = openJsonSafely(marksPath)

            for f, value in marks.items():
                fullCategory = value[const.fullCategory]
                value[const.ctgIdx] = categories.index(fullCategory)

            json.dump(marks, open(marksPath, "w"), indent=3)
            print(f"{Fore.BLUE}JSON file {marksPath} has been fixed{Style.RESET_ALL}")
        except Exception as e:
            print(e)
示例#13
0
 def test_walk_ignorable(self):
     os.chdir(sys.path[0])
     self.assertEqual(sorted([
         Path('ignorable/b.txt'),
         Path('ignorable/c/e.txt'),
         Path('ignorable/f/g.txt')
     ]), sorted(list(walk(Path('ignorable'), []))))
示例#14
0
def check(args: argparse.Namespace) -> None:
    """Verify that index matches files, print out any mismatches

    :param args: must have attr cold_dir: str
    """
    cold_dir = Path(args.cold_dir)
    assert cold_dir.is_dir(), "cold_dir not found!"
    index = Index(cold_dir)
    fail_count = 0

    # Set up progress bar
    total = sum([(cold_dir / p).stat().st_size if
                 (cold_dir / p).exists() else 0 for p in index.keys()])
    with tqdm(total=total, unit="B", unit_scale=True) as pbar:
        # Check that index is correct
        for p, h in index.items():
            if h != hash_file(cold_dir / p, pbar):
                print(f"Verification failed: '{p}'.", file=sys.stderr)
                fail_count += 1
        # Additionally check that index is complete
        for file in walk(
                cold_dir,
            [PathAwareGitWildMatchPattern('index.txt', cold_dir)]):
            rel_path: PurePath = file.relative_to(cold_dir)
            if rel_path not in index:
                print(f"File missing from index: '{rel_path}'.",
                      file=sys.stderr)
                fail_count += 1

    if fail_count == 0:
        print("OK: Data is intact!")
    else:
        print(f"FAIL: There were {fail_count} failures!")
示例#15
0
    def process_dee(self, dir_path, ext) -> List[Dict[str, Any]]:
        # Process dependees (libraries) to get function implementations
        indexes = []
        #_, nwo = remap_nwo(nwo)
        #if nwo is None:
        #return indexes

        #tmp_dir = download(nwo)
        files = walk(dir_path, ext)
        # files = glob.iglob(tmp_dir.name + '/**/*.{}'.format(ext), recursive=True)
        sha = None

        for f in files:
            definitions = self.get_function_definitions(f)
            if definitions is None:
                continue
            '''
            if sha is None:
                sha = get_sha(dir, nwo)
            '''
            nwo, path, functions = definitions
            indexes.extend((self.extract_function_data(func, nwo, path, sha)
                            for func in functions
                            if len(func['function_tokens']) > 1))
        return indexes
示例#16
0
def sync(args: argparse.Namespace) -> None:
    """Prompt user for each change towards getting hot dir, cold dir and cold index synced

    :param args: must have attrs hot_dir:str and cold_dir: str
    """
    hot_dir, cold_dir = Path(args.hot_dir), Path(args.cold_dir)
    assert hot_dir.is_dir(), "hot_dir not found!"
    assert cold_dir.is_dir(), "cold_dir not found!"

    index = Index(cold_dir)
    # inv_index = defaultdict(list)
    # for k, v in index.items():
    #     inv_index[v].append(k)

    # Set up progress bar
    total = 0
    for file in itertools.chain(walk(hot_dir, []), walk(cold_dir, [])):
        total += file.stat().st_size
    with tqdm(total=total, unit="B", unit_scale=True) as pbar:
        # Find all changes required
        changes = walk_trees(PurePath(), index, hot_dir, cold_dir, [], [],
                             pbar)

        # TODO: calculate reverse indices recursively for added and removed
        # TODO: find all moved. Can be also moved into added or out from removed
        # for file in hot_only:
        #     h = hash_file(os.path.join(args.hot_dir, file), pbar)
        #     if h in inv_index and set(cold_only) & set(inv_index[h]):
        #         print(set(cold_only) & set(inv_index[h]), "moved to", file)

    # Confirm each change with the user
    changes.sort(key=attrgetter('name'))
    actions = []
    action_total = 0
    for change in changes:
        if yesno(str(change), default=False):
            actions.append(change)
            action_total += change.size

    # Carry out all confirmed changes
    with tqdm(total=action_total, unit="B", unit_scale=True) as pbar:
        for change in actions:
            change.apply(args.hot_dir, args.cold_dir, index)
            pbar.update(change.size)

    index.store()
    print("OK: Done!")
示例#17
0
 def sign(self):
     """return sign in the form {relpath: (origin layer, SHA256)}
     """
     sigs = {}
     for entry, sig in utils.walk(self.target, utils.sign, kind="files"):
         relpath = entry.relpath(self._target.directory)
         sigs[relpath] = (self.interface.url, "static", sig)
     return sigs
示例#18
0
def prepareVideo(rpath, wpath):
    videos = walk(rpath, targetExtensions=Extensions.videos()).get("extensions")

    for vset in videos:
        vpath = os.path.join(rpath, *vset)

        for frame in generateFrames(vpath):
            cv2.imwrite(os.path.join(wpath, "negative-{}{}".format(uuid.uuid1(), Extensions.jpg)), frame)
示例#19
0
def create_plist():
    with open(e('${GUI_DESTDIR}/gui-plist'), 'w') as f:
        for i in walk('${GUI_DESTDIR}'):
            if not os.path.isdir(e('${GUI_DESTDIR}/${i}')):
                f.write(e('/usr/local/www/gui/${i}\n'))

        with open(e('${GUI_STAGEDIR}/custom-plist')) as c:
            f.write(c.read())
示例#20
0
 def sign(self):
     """return sign in the form {relpath: (origin layer, SHA256)}
     """
     sigs = {}
     for entry, sig in utils.walk(self.target,
                                  utils.sign, kind="files"):
         relpath = entry.relpath(self._target.directory)
         sigs[relpath] = (self.interface.url, "static", sig)
     return sigs
示例#21
0
    def process_dent(self, nwo, ext, library_candidates) -> Tuple[List[Dict[str, Any]], List[Tuple[str, str]]]:
        # Process dependents (applications) to get function calls
        dents = []
        edges = []
        _, nwo = remap_nwo(nwo)
        if nwo is None:
            return dents, edges

        tmp_dir = download(nwo)
        files = walk(tmp_dir, ext)
        sha = None

        for f in files:
            context_and_calls = self.get_context_and_function_calls(f)
            if context_and_calls is None:
                continue
            if sha is None:
                sha = get_sha(tmp_dir, nwo)

            nwo, path, context, calls = context_and_calls
            libraries = []
            for cxt in context:
                if type(cxt) == dict:
                    libraries.extend([v.split('.')[0] for v in cxt.values()])
                elif type(cxt) == list:
                    libraries.extend(cxt)

            match_scopes = {}
            for cxt in set(libraries):
                if cxt in library_candidates:
                    match_scopes[cxt] = library_candidates[cxt]

            for call in calls:
                for depended_library_name, dependend_library_functions in match_scopes.items():
                    for depended_library_function in dependend_library_functions:
                        # Other potential filters: len(call['identifier']) > 6 or len(call['identifier'].split('_')) > 1
                        if (call['identifier'] not in self.language_parser.STOPWORDS and
                            ((depended_library_function['identifier'].split('.')[-1] == '__init__' and
                              call['identifier'] == depended_library_function['identifier'].split('.')[0]) or
                             ((len(call['identifier']) > 9 or
                               (not call['identifier'].startswith('_') and len(call['identifier'].split('_')) > 1)) and
                              call['identifier'] == depended_library_function['identifier'])
                            )):
                            dent = {
                                'nwo': nwo,
                                'sha': sha,
                                'path': path,
                                'language': self.language,
                                'identifier': call['identifier'],
                                'argument_list': call['argument_list'],
                                'url': 'https://github.com/{}/blob/{}/{}#L{}-L{}'.format(nwo, sha, path,
                                                                                         call['start_point'][0] + 1,
                                                                                         call['end_point'][0] + 1)
                            }
                            dents.append(dent)
                            edges.append((dent['url'], depended_library_function['url']))
        return dents, edges
示例#22
0
 def sign(self):
     """return sign in the form {relpath: (origin layer, SHA256)}
     """
     sigs = {}
     for entry, sig in utils.walk(self.target_file.dirname(),
                                  utils.sign, kind="files"):
         relpath = entry.relpath(self._target.directory)
         sigs[relpath] = (self.current.url, "dynamic", sig)
     return sigs
示例#23
0
 def sign(self):
     """return sign in the form {relpath: (origin layer, SHA256)}
     """
     sigs = {}
     for entry, sig in utils.walk(self.target_file.dirname(),
                                  utils.sign,
                                  kind="files"):
         relpath = entry.relpath(self._target.directory)
         sigs[relpath] = (self.current.url, "dynamic", sig)
     return sigs
示例#24
0
def find_scc(G):
    GT = tr(G)
    sccs, seen = [], set()
    res = dfs_topsort(G)
    for u in res:
        if u in seen:
            continue
        C = walk(GT, u, seen)
        seen.update(C)
        sccs.append(C)

    return sccs
示例#25
0
    def __init__(self, well_dir, params):

        # First generate the parent object.
        super(LogContainer, self).__init__(params)

        self.well_dir = well_dir
        self.reset_all()

        for shp in utils.walk(well_dir, '\\.shp$'):
            with fiona.open(shp, "r") as wells:
                for well in wells:
                    shp = shape(well['geometry'])
                    self.lookup[shp] = well["properties"]['name']
示例#26
0
    def __init__(self, well_dir, params):

        # First generate the parent object.
        super(LogContainer, self).__init__(params)

        self.well_dir = well_dir
        self.reset_all()

        for shp in utils.walk(well_dir, '\\.shp$'):
            with fiona.open(shp, "r") as wells:
                for well in wells:
                    shp = shape(well['geometry'])
                    self.lookup[shp] = well["properties"]['name']
示例#27
0
def actualizeInfoWithFrames(datasetPath):
    print("\nActualizing info...")
    actualInfo = {}
    os.makedirs(os.path.dirname(Path.actualInfo), exist_ok=True)

    frames = walk(datasetPath, targetDirs=const.frames)
    frames = frames.get("dirs")

    for idx, dirsList in enumerate(frames):
        dirsList = dirsList[:-1]

        fullpath = os.path.join(datasetPath, *dirsList)
        images = walk(fullpath, targetExtensions=Extensions.images()).get("extensions") # TODO : некоторые картинки могут не содержать категории

        putNested(dictionary=actualInfo, keys=dirsList, value=len(images))
        dirsList[-1] = const.overall
        updateNested(dictionary=actualInfo, keys=dirsList, value=len(images))

        print("\r{:.1f}% of work has been done".format((idx + 1) / len(frames) * 100), end="")

    print()
    json.dump(actualInfo, open(Path.actualInfo, "w"), indent=3)
示例#28
0
def extract_dataset(dataset: str,
                    extractor: str = "arcface",
                    gpu: int = -1) -> np.ndarray:
    """Extract feature vectors of each image within a dataset.
    Return array conatining all extracted features.

    Parameters
    ----------
    dataset: str
        Dataset to extract features from. Examples would be gtdb or lfw
    extractor: str = "arcface"
        Model to use for feature extraction. Currently supported options are
        arcface/facenet
    gpu: int = -1
        GPU id to use for feature extraction and preprocessing models. If -1
        is given, CPU is used rather than GPU

    Returns
    -------
    np.ndarray
        Array of features corresponding each image from a dataset. Subject ids
        are appended to end of feature vectors. Resulting output will be of
        shape (number of dataset images)x513

    """
    if extractor == "arcface":
        face = ArcFace(gpu)
    else:
        face = FaceNet(gpu)

    dataset_path = f"images/{dataset}"

    file_cnt = len(walk(dataset_path))
    features = np.zeros((file_cnt, 513))

    subjects = sorted(os.listdir(dataset_path),
                      key=lambda subject: subject.lower())

    image_cnt = 0
    for subject_id, subject in enumerate(subjects):
        progress_bar(f"{dataset} {extractor}", (image_cnt + 1) / file_cnt)

        for image in os.listdir(f"{dataset_path}/{subject}"):
            image = cv2.imread(f"{dataset_path}/{subject}/{image}")

            feature = face.extract(image)
            features[image_cnt, :] = np.append(feature, subject_id + 1)

            image_cnt += 1

    return features
示例#29
0
文件: recovery.py 项目: jn0/fb2utils
def main():
    # parsing command-line options
    global options
    option_list = [
        make_option("-o", "--out", dest="outfile",
                    help="write result to FILE", metavar="FILE"),
        make_option("-d", "--dest-dir", dest="dest_dir",
                    help="save result files to DIR", metavar="DIR"),
        make_option("-z", "--zip", dest="zip", action="store_true",
                    default=False, help="zip result file"),
        make_option("-n", "--no-zip", dest="nozip", action="store_true",
                    default=False, help="don't zip result file"),
        make_option("-c", "--check-only", dest="check_only",
                    action="store_true", default=False,
                    help="check only, do not save result"),
        make_option("-f", "--force", dest="force", action="store_true",
                    default=False, help="don't validate XML"),
##         make_option("-b", "--pre-fb2-lint", dest="pre_fb2lint",
##                     action="store_true", default=False,
##                     help="pre process FB2 validation"),
##         make_option("-a", "--post-fb2-lint", dest="post_fb2lint",
##                     action="store_true", default=False,
##                     help="post process FB2 validation"),
        make_option("-e", "--output-encoding", dest="output_encoding",
                    default = 'utf-8', metavar="ENC",
                    help="fb2 output encoding"),
        make_option("-v", "--verbose", dest="verbose", action="store_true",
                    default=False, help="more info"),
        make_option("-q", "--quiet", dest="quiet", action="store_true",
                    default=False, help="less info"),
        ]
    parser = OptionParser(option_list=option_list,
                          usage="usage: %prog [options] files|dirs",
                          version="%prog "+prog_version)
    options, args = parser.parse_args()

    if options.verbose:
        LogOptions.level = 0
    elif options.quiet:
        LogOptions.level = 2

    starttime = time.time()
    # walk a files
    for filename in walk(args):
        process_file(filename)
    # print stats
    if options.verbose:
        et = time.time() - starttime
        print 'elapsed time: %.2f secs' % et
        print 'average: %.3f secs' % (et/total_files)
示例#30
0
def process(arg):
    global not_deleted_list, update_time
    curs = _connect.cursor()
    res = curs.execute(
        "SELECT BookId FROM libbook WHERE NOT (Deleted&1) and FileType = 'fb2' "
    )
    not_deleted_list = curs.fetchall()
    not_deleted_list = set([i[0] for i in not_deleted_list])
    curs.execute('SELECT * FROM librusec')
    update_time = curs.fetchone()[0]
    for fn in walk(arg):
        for ftype, z_filename, data in read_file(fn, zip_charset='utf-8'):
            process_file(fn, ftype, z_filename, data)
    if options.search_deleted:
        deleted = set()
        for fn in walk(options.search_deleted):
            bookid = base_name(fn, '.fb2.zip')
            try:
                bookid = int(bookid)
            except ValueError:
                continue
            if bookid in not_deleted_list:
                deleted.append(fn)
        for fn in deleted:
            for ftype, z_filename, data in read_file(fn, zip_charset='utf-8'):
                ret = process_file(fn, ftype, z_filename, data)
                if ret:
                    print_log('restore deleted:', bookid)
    print
    print 'processed:', stats.total
    print 'passed:', stats.passed
    print 'fixed:', stats.fixed
    print 'errors:', stats.errors
    if options.not_found:
        fd = open(options.not_found, 'w')
        for bookid in not_deleted_list:
            print >> fd, bookid
示例#31
0
def fixJsons():
    import os
    from utils import walk, readLines

    categories = readLines(Path.categories)
    jsons = walk(Path.dataset, targetFiles="marks.json").get("files")

    for i, jsn in enumerate(jsons):
        print(f"\rProcessing {i} json file", end="")

        path = os.path.join(Path.dataset, *jsn)
        marks = json.load(open(path, "r"))
        for name, items in marks.items():
            ctgIdx = categories.index(items[Constants.fullCategory])
            items[Constants.ctgIdx] = ctgIdx

        json.dump(marks, open(path, "w"), indent=4)
示例#32
0
    def plan_layers(self, layers, output_files):
        config = ComposerConfig()
        config = config.add_config(
            layers["layers"][0] / ComposerConfig.DEFAULT_FILE, True)

        for i, layer in enumerate(layers["layers"]):
            log.info("Processing layer: %s", layer.url)
            if i + 1 < len(layers["layers"]):
                next_layer = layers["layers"][i + 1]
                config = config.add_config(
                    next_layer / ComposerConfig.DEFAULT_FILE, True)
            list(e for e in utils.walk(layer.directory,
                                       self.build_tactics,
                                       current=layer,
                                       config=config,
                                       output_files=output_files))
        plan = [t for t in output_files.values() if t]
        return plan
示例#33
0
def main():
    LogOptions.level = 0
    Stat.starttime = time.time()
    for f in walk(sys.argv[1:]):
        process_file(f)
    # print stats
    def p_stat(msg, v):
        print '%s: %d (%d%%)' % (msg, v, round(v*100./Stat.total))
    print 'total files:', Stat.total
    p_stat('not an xml file', Stat.not_xml)
    p_stat('sax parsing error', Stat.sax_errors)
    p_stat('dom parsing error', Stat.xml_errors)
    p_stat('fb2 schema violation', Stat.fb2_errors)
    p_stat('inconsistent fb2 file', Stat.extra_errors)
    p_stat('good files', Stat.good)
    et = time.time() - Stat.starttime
    print 'elapsed time: %.2f secs' % et
    print 'average: %.3f secs' % (et/Stat.total)
示例#34
0
文件: main.py 项目: ownport/scrapets
def content(ctx, **opts):
    ''' content processing
    '''
    def process(action, path, expr=None, profile=None):

        _content = content.Content('content', codecs.open(path, 'r', 'utf-8').read())

        if expr:
            method, expr = expr.split(':', 1)
            if method not in ('xpath', 'css'):
                utils.show_help(ctx)

            if action == 'select':
                print json.dumps(_content.select(expr, method).extract())
            elif action == 'remove':
                print json.dumps(_content.remove(expr, method).extract())

        if profile:
            result = _content.process(profile)
            if isinstance(result, content.Content):
                print json.dumps(result.extract())
            elif isinstance(result, dict):
                print json.dumps(result)
            else:
                raise RuntimeError('Unknown result type, %s' % type(result))

    if not opts['path']:
        utils.show_help(ctx)
    if opts['action'] in ('select', 'remove') and not opts['expr']:
        utils.show_help(ctx)

    import content

    _profile = None
    if opts['profile'] and os.path.exists(opts['profile']):
        _profile = codecs.open(opts['profile'], 'r', 'utf-8').read()

    for path in utils.walk(opts['path']):
        process(opts['action'], path, expr=opts['expr'], profile=_profile)
        try:
            pass
        except Exception, err:
            print >> sys.stderr, "[ERROR] Cannot process the file, %s. Error: %s" % (path, err)
示例#35
0
def main():
    LogOptions.level = 0
    Stat.starttime = time.time()
    for f in walk(sys.argv[1:]):
        process_file(f)
    # print stats
    def p_stat(msg, v):
        print '%s: %d (%d%%)' % (msg, v, round(v * 100. / Stat.total))

    print 'total files:', Stat.total
    p_stat('not an xml file', Stat.not_xml)
    p_stat('sax parsing error', Stat.sax_errors)
    p_stat('dom parsing error', Stat.xml_errors)
    p_stat('fb2 schema violation', Stat.fb2_errors)
    p_stat('inconsistent fb2 file', Stat.extra_errors)
    p_stat('good files', Stat.good)
    et = time.time() - Stat.starttime
    print 'elapsed time: %.2f secs' % et
    print 'average: %.3f secs' % (et / Stat.total)
示例#36
0
def extractCropsThroughDataset(datasetPath,
                               extractionPath=None,
                               categories=None,
                               subcategories=None,
                               extension=Extensions.png,
                               params=None,
                               parallel=True,
                               threads=16):

    frames = walk(datasetPath, targetDirs=const.frames).get("dirs")
    frames = filterFolders(frames, categories, subcategories)

    if parallel:
        threads = min(threads, mp.cpu_count())
    else:
        threads = 1

    globalIdx = 0
    threadsList = []
    with mp.Pool(threads) as pool:
        for dirsSet in frames:
            dirsSet = dirsSet[:-1]
            categoryDir = os.path.join(datasetPath, *dirsSet)

            length = len(
                openJsonSafely(
                    os.path.join(categoryDir, makeJSONname(const.marks))))

            threadsList.append(
                pool.apply_async(extractCrops,
                                 args=(categoryDir, ),
                                 kwds={
                                     "extractionPath": extractionPath,
                                     "extension": extension,
                                     "params": params,
                                     "globalIdx": globalIdx
                                 }))

            globalIdx += length

        for r in threadsList:
            r.get()
示例#37
0
文件: main.py 项目: ownport/scrapets
def linkextract(ctx, **opts):
    ''' link extractor
    '''
    if not opts['path']:
        utils.how_help(ctx)

    import re
    import extract

    URLFILTER = None
    if opts['filter']:
        URLFILTER = re.compile(opts['filter'])

    le = extract.LinkExtractor()
    for path in utils.walk(opts['path']):
        try:
            le.feed(open(path).read().decode('utf-8'))
            for link in filter(lambda u: URLFILTER.search(u) if URLFILTER else True, le.links):
                print link
        except Exception, err:
            print >> sys.stderr, "[ERROR] Cannot process the file, %s" % (path,)
示例#38
0
def extract_dataset(dataset, extractor="arcface", gpu=-1):
    if extractor == "arcface":
        face = ArcFace(gpu)
    else:
        face = FaceNet(gpu)

    dataset_path = os.path.join(os.path.abspath(""), "images",
                                dataset)  #dataset will be "lfw" or "gtdb"

    file_cnt = len(walk(dataset_path))
    features = np.zeros((file_cnt, 513))
    #features_flip = np.zeros((file_cnt, 513)) #omitted by Kai

    image_cnt = 0
    subjects = os.listdir(dataset_path)
    subjects = [
        x for _, x in sorted(
            zip([subject.lower() for subject in subjects], subjects))
    ]  # this is to do case-insensitive sorting
    for subject_id, subject in enumerate(subjects):
        progress_bar(dataset + " " + extractor,
                     float(image_cnt + 1) / file_cnt)

        for image in os.listdir(os.path.join(dataset_path, subject)):
            image = cv2.imread(os.path.join(dataset_path, subject, image))

            feature = face.extract(
                image
            )  #the return value of extract here should be a row vector of 512 elements
            features[image_cnt, :] = np.append(
                feature, subject_id + 1
            )  #the return value of append here should be a row vector of 513 elements

            #feature_flip = face.extract(cv2.flip(image, 1)) #omitted by Kai
            #features_flip[image_cnt, :] = np.append(feature_flip, subject_id + 1) #omitted by Kai

            image_cnt += 1

    #return features, features_flip #omitted by Kai
    return features
示例#39
0
 def __call__(self):
     # copy the entire tree into the
     # hooks/relations/<interface>
     # directory
     log.debug("Copying Interface %s: %s", self.interface.name, self.target)
     # Ensure the path exists
     if self.target.exists():
         # XXX: fix this to do actual updates
         return
     ignorer = utils.ignore_matcher(self.config.ignores)
     for entity, _ in utils.walk(self.interface.directory,
                                 lambda x: True,
                                 matcher=ignorer,
                                 kind="files"):
         target = entity.relpath(self.interface.directory)
         target = (self.target / target).normpath()
         target.parent.makedirs_p()
         entity.copy2(target)
     init = self.target / "__init__.py"
     if not init.exists():
         # ensure we can import from here directly
         init.touch()
示例#40
0
def main():
    # parsing command-line options
    global options, markup
    option_list = [
        make_option("-o",
                    "--out",
                    dest="outfile",
                    help="write result to FILE",
                    metavar="FILE"),
        make_option("-m",
                    "--html",
                    dest="html",
                    action="store_true",
                    default=False,
                    help="output in HTML"),
        make_option("-q",
                    "--quiet",
                    dest="quiet",
                    action="store_true",
                    default=False,
                    help="show errors only"),
    ]
    parser = OptionParser(option_list=option_list,
                          usage="usage: %prog [options] files|dirs",
                          version="%prog " + prog_version)
    options, args = parser.parse_args()
    LogOptions.level = 0  # show all errors
    if options.html:
        markup = html_markup
        LogOptions.br = '<br />'
        LogOptions.escape = True
    if options.outfile:
        LogOptions.outfile = open(options.outfile, 'at')
    else:
        LogOptions.outfile = sys.stdout
    errors = 0
    for f in walk(args):
        errors += process_file(f)
    sys.exit(errors)
示例#41
0
def actualizeInfoWithJsons(datasetPath):
    print("\nActualizing info...")
    actualInfo = {}
    os.makedirs(os.path.dirname(Path.actualInfo), exist_ok=True)

    frames = walk(datasetPath, targetDirs=const.frames)
    frames = frames.get("dirs")

    for idx, dirsList in enumerate(frames):
        dirsList = dirsList[:-1]

        fullpath = os.path.join(datasetPath, *dirsList, makeJSONname(const.marks))
        marks = json.load(open(fullpath, "r"))

        putNested(dictionary=actualInfo, keys=dirsList, value=len(marks))
        dirsList[-1] = const.overall
        updateNested(dictionary=actualInfo, keys=dirsList, value=len(marks))

        print("\r{:.1f}% of work has been done".format((idx + 1) / len(frames) * 100), end="")

    print()
    json.dump(actualInfo, open(Path.actualInfo, "w"), indent=3)
示例#42
0
文件: plugins.py 项目: nsubiron/nscmd
def load_plugins(directories, interface):
    strip_ext = lambda f: os.path.splitext(os.path.basename(f))[0]
    is_valid = lambda obj: inspect.isclass(obj) and issubclass(obj, interface)
    for directory in directories:
      for dirname, _, filenames in utils.walk(directory, ['*.py','*.pyc']):
        added = False
        if dirname not in sys.path:
          sys.path.insert(0, dirname)
          added = True
        for module_name in set(strip_ext(f) for f in filenames):
          try:
            module_info = imp.find_module(module_name, [dirname])
            # This do a reload if already imported.
            module = imp.load_module(module_name, *module_info)
            for name, declaration in inspect.getmembers(module, is_valid):
              yield PluginProxy(name, declaration)
          except:
            pass
          finally:
            module_info[0].close()
        if added:
          sys.path.remove(dirname)
示例#43
0
 def __call__(self):
     # copy the entire tree into the
     # hooks/relations/<interface>
     # directory
     log.debug("Copying Interface %s: %s",
               self.interface.name, self.target)
     # Ensure the path exists
     if self.target.exists():
         # XXX: fix this to do actual updates
         return
     ignorer = utils.ignore_matcher(self.config.ignores)
     for entity, _ in utils.walk(self.interface.directory,
                                 lambda x: True,
                                 matcher=ignorer,
                                 kind="files"):
         target = entity.relpath(self.interface.directory)
         target = (self.target / target).normpath()
         target.parent.makedirs_p()
         entity.copy2(target)
     init = self.target / "__init__.py"
     if not init.exists():
         # ensure we can import from here directly
         init.touch()
示例#44
0
def augmentDatasetWithRepeats(augmentationName,
                              augmentations,
                              imageExtension,
                              repeats=1,
                              params=None):
    actualInfo = downloadActualInfo().get(const.original, {})

    target = getTargetCount(actualInfo,
                            targetType="max")  # вообще не самый хороший выбор

    path = os.path.join(Path.dataset, const.original)
    keys = walk(path, targetDirs=const.frames).get("dirs")

    for set_ in keys:
        set_ = set_[:-1]
        count = getNested(dictionary=actualInfo, keys=set_, default=0)

        category, subcategory = set_
        categoryPath = os.path.join(path, category, subcategory)

        if count == 0:
            print(
                f"{Fore.RED}Update actual info for {categoryPath} {Style.RESET_ALL}"
            )
            continue

        multiplier = int(target // count)
        ctgRepeats = repeats * multiplier

        augmentCategoryWithRepeats(
            categoryPath=categoryPath,
            fullCategory=getFullCategory(category, subcategory),
            augmentPath=os.path.join(Path.dataset, augmentationName),
            augmentations=augmentations,
            extension=imageExtension,
            repeats=ctgRepeats,
            params=params)
示例#45
0
def inspect(charm):
    tw = utils.TermWriter()
    manp = charm / ".composer.manifest"
    comp = charm / "composer.yaml"
    if not manp.exists() or not comp.exists():
        return
    manifest = json.loads(manp.text())
    composer = yaml.load(comp.open())
    a, c, d = utils.delta_signatures(manp)

    # ordered list of layers used for legend
    layers = list(manifest["layers"])

    def get_depth(e):
        rel = e.relpath(charm)
        depth = len(rel.splitall()) - 2
        return rel, depth

    def get_suffix(rel):
        suffix = ""
        if rel in a:
            suffix = "+"
        elif rel in c:
            suffix = "*"
        return suffix

    def get_color(rel):
        # name of layer this belongs to
        color = tw.term.normal
        if rel in manifest["signatures"]:
            layer = manifest["signatures"][rel][0]
            layer_key = layers.index(layer)
            color = getattr(tw, theme.get(layer_key, "normal"))
        else:
            if entry.isdir():
                color = tw.blue
        return color

    tw.write("Inspect %s\n" % composer["is"])
    for layer in layers:
        tw.write(
            "# {color}{layer}{t.normal}\n", color=getattr(tw, theme.get(layers.index(layer), "normal")), layer=layer
        )
    tw.write("\n")
    tw.write("{t.blue}{target}{t.normal}\n", target=charm)

    ignorer = utils.ignore_matcher(config.DEFAULT_IGNORES)
    walk = sorted(utils.walk(charm, get_depth), key=lambda x: x[1][0])
    for i in range(len(walk) - 1):
        entry, (rel, depth) = walk[i]
        nEnt, (nrel, ndepth) = walk[i + 1]
        if not ignorer(rel):
            continue

        tw.write(
            "{prefix}{layerColor}{entry} " "{t.bold}{suffix}{t.normal}\n",
            prefix=get_prefix(walk, i, depth, ndepth),
            layerColor=get_color(rel),
            suffix=get_suffix(rel),
            entry=rel.name,
        )
示例#46
0
                'coffee_rings': 0,
                'distort': False,
                'scribble': False,
                }

    for k, v in defaults.items():
        if cfg.get(k) is None:
            cfg[k] = v

    cfg['outfile'] = args.out

    # Gather files to work on, then go and do them.
    if os.path.isfile(target):
        Notice.hr_header("Processing file: {}".format(target))
        main(target, cfg)
        Notice.hr_header("Done")
    elif os.path.isdir(target):
        if args.recursive:
            Notice.info("Looking for SEGY files in {} and its subdirectories".format(target))
            for target in utils.walk(target, "\\.se?gy$"):
                Notice.hr_header("Processing file: {}".format(target))
                main(target, cfg)
        else:
            Notice.info("Finding SEGY files in {}".format(target))
            for target in utils.listdir(target, "\\.se?gy$"):
                Notice.hr_header("Processing file: {}".format(target))
                main(target, cfg)
        Notice.hr_header("Done")
    else:
        Notice.fail("Not a file or directory.")
示例#47
0
def sgy2shp(input_dir, output_dir, convert=False):
    """
    Extracts trace location from SEGY files and saves it in a
    shape file. A shape file is generated for each SEGY file.

    Returns nothing, side effect: writes the shape files.

    Args:
        input_dir (str): Directory containing SEGY files.
        output_dir (str): Directory to save shape files.
    """

    line_out_file = os.path.join(output_dir, "seismic_lines.shp")

    if os.path.exists(line_out_file):
        raise ShapeFileExists

    # Set up the shapefile schema.
    line_schema = {'geometry': 'LineString',
                   'properties': {'segyfile': 'str',
                                  'line': 'str'
                                  }
                   }

    with fiona.open(line_out_file, "w",
                    driver="ESRI Shapefile",
                    crs=crs.from_epsg(26920),
                    schema=line_schema) as line_out:

        for path in utils.walk(input_dir, "\\.se?gy$"):

            filebase = os.path.splitext(os.path.basename(path))[0]

            # Read in the headers.
            segy = obspy.read(path,
                              headonly=True,
                              unpack_trace_header=True)

            points = []

            point_out_file = os.path.join(output_dir, "." +
                                          filebase + '.shp')

            # Set up the shapefile schema.
            point_schema = {'geometry': 'Point',
                            'properties': {'line': 'str',
                                           'segyfile': 'str',
                                           'trace': 'int'
                                           }
                            }

            with fiona.open(point_out_file, "w",
                            driver="ESRI Shapefile",
                            crs=crs.from_epsg(26920),
                            schema=point_schema) as trace_out:

                for i, trace in enumerate(segy):

                    header = trace.stats.segy.trace_header
                    scalar = header.scalar_to_be_applied_to_all_coordinates
                    if scalar == -100:
                        gain = 0.01
                    elif scalar == -10:
                        gain = 0.1
                    else:
                        gain = 1.0

                    x = float(header.source_coordinate_x) * gain
                    y = float(header.source_coordinate_y) * gain

                    # Sanity check for geometry order of magnitude.
                    if x > 9e5 or y > 55e6:
                        if x > 9e6 or y > 55e7:
                            log.info('Found weird coords: dividing by 100')
                            x = x / 100.0
                            y = y / 100.0
                        else:
                            log.info('Found weird coords: dividing by 10')
                            x = x / 10.0
                            y = y / 10.0
                    else:
                        pass

                    if convert:
                        log.info("Converting from NAD27 to NAD83")
                        x, y = pp.transform(utm_nad27, utm_nad83, x, y)

                    p = Point(x, y)
                    points.append(p)
                    trace_out.write({'geometry': mapping(p),
                                     'properties': {'line': filebase,
                                                    'segyfile': path,
                                                    'trace': i}
                                     })

            # We need this to plot seismic lines on the map.
            linestring = LineString(points)
            line_out.write({'geometry': mapping(linestring),
                            'properties': {'segyfile': path,
                                           'line': filebase}
                            })
示例#48
0
    links = {}
    content = ''
    with open(path, 'r') as input:
        for line in input:
            # See if this line is a footnote link.
            m = re.search('   \[([0-9]+)\]: (http://[\S]+)(?: \(([^\)]+)\))?$', line)
            if m:
                # Yup, key it's number.
                num, link, desc = m.groups('')
                links[num] = (link, desc)
            else:
                # Not a link line, so add it
                content += line

    # Now replace all of the footnoted links with inline ones.
    for num, pair in links.iteritems():
        def replace_link(m):
            if pair[1] != '':
                return '[%s](%s "%s")' % (m.group(1), pair[0], pair[1])
            else:
                return '[%s](%s)' % (m.group(1), pair[0])

        pattern = '\[([^\]]+)\]\[%s\]' % (num)
        content = re.sub(pattern, replace_link, content)

    # Save the file back out
    with open(path, 'w') as output:
        output.write(content)

utils.walk('posts', fix_file)
示例#49
0
    if last_length > 0:
        print ("\r" + (" " * last_length) + "\r"),

    print text,
    sys.stdout.flush()
    last_length = len(text)

# clean out the output directory
utils.kill_dir('html')

# copy over the static content
shutil.copytree('static', 'html')

# load the template page
post_template = open('templates/post.html', 'r').read()

def strip_newline(line):
    return line.rstrip()

utils.walk('posts', read_post, '.md')
years = get_years(posts)
tags = get_tags(posts)

i = 1
for post in posts:
    post.output(years, tags)
    write_line('%s/%s' % (i, len(posts)))
    i += 1

write_line('Processed %s posts.' % (len(posts),))
示例#50
0
# Parses the titles and permalinks from markdown.
import utils
import re

def fix_file(path):
    with open(path, 'r') as input:
        content = input.read()
        # Parse the title and link
        m = re.search('\# \[([^\]]+)\]\(http://journal\.stuffwithstuff\.com/20\d\d/\d\d/\d\d/(\S+)/', content)

        # Add the title
        result = 'title = ' + m.group(1) + '\n'

        perma = utils.linkify(m.group(1))
        if perma != m.group(2):
            result = result + 'permalink = ' + m.group(2) + '\n'
            print path
            print perma
            print m.group(2)
            print '---'

        result = result + content

        # Save the file back out
        with open(path, 'w') as output:
            output.write(result)

utils.walk('posts', fix_file, '.md')
示例#51
0
# Renames the post files to Jekyll-style.
import os.path

import utils
import re

#   http://journal.stuffwithstuff.com/2008/02/09/

def fix_file(path):
    with open(path, 'r') as input:
        print path

        m = re.search('(.*)\.markdown', os.path.basename(path))
        date = m.group(1)

        content = input.read()
        m = re.search('title: "(.*)"', content)
        perm = utils.linkify(m.group(1))

        # Save the file back out
        with open('new/%s-%s.md' % (date, perm), 'w') as output:
            output.write(content)

utils.ensure_dir('new')
utils.walk('_posts', fix_file, '.markdown')
示例#52
0
# Removes markdown titles.
import utils
import re


def fix_file(path):
    with open(path, "r") as input:
        content = input.read()
        # Parse the title and link
        content = re.sub(
            "\n+\# \[([^\]]+)\]\(http://journal\.stuffwithstuff\.com/20\d\d/\d\d/\d\d/(\S+)/\)\n+", "\n", content
        )

        # Save the file back out
        with open(path, "w") as output:
            output.write(content)


utils.walk("posts", fix_file, ".md")
示例#53
0
                # See if this line ends the code block.
                if line.startswith(indent):
                    content += empty_lines
                    empty_lines = ""
                    content += line[len(indent) :]
                else:
                    content += "{% endhighlight %}\n"
                    content += empty_lines
                    empty_lines = ""
                    content += line
                    indent = None
                    language = None
            else:
                content += empty_lines
                empty_lines = ""
                # See if this line starts a code block.
                m = re.search("((    )+):::(.*)\n$", line)
                if m:
                    indent = m.group(1)
                    language = m.group(3)
                    content += "{% highlight " + language + " %}\n"
                else:
                    content += line

    # Save the file back out
    with open(path, "w") as output:
        output.write(content)


utils.walk("_posts", fix_file)