def readFile(path):
        """
        Function loads one selected file.
        :param path: path to file which user wants to load (String)
        :return: depends on filetype:
            if .odt - odt_data, stages
            if .omf || .ovf - rawVectorData, header
        """
        if ".odt" in path:
            odt_data, stages = getPlotData(path)
            return odt_data, stages

        elif ".omf" in path or ".ovf" in path:
            rawVectorData = None
            if is_binary(path):
                headers, rawVectorData = MultiprocessingParse.readBinary(
                    [path])
                header = headers[0]
            elif not is_binary(path):
                rawVectorData = MultiprocessingParse.readText([path])
                header = getFileHeader(path)
            else:
                raise RuntimeError("multiprocessing_parse.py readFile:" +\
                                                    " Can't detect encoding!")
            return rawVectorData, header
        else:
            raise ValueError("Invalid file! Must have .odt, .omf " + \
                                                        "or .ovf extension!")
示例#2
0
def dirCheck(DIRPATH) :
    dirList = os.listdir(DIRPATH)
    for inFile in dirList :
        if(os.path.isdir(DIRPATH+inFile)) :
	    print(DIRPATH+inFile+'/')
            dirCheck(DIRPATH+inFile+'/')
        elif(is_binary(DIRPATH+inFile)) :
            continue
        else :
            nLine = 0
            f = open(DIRPATH+inFile,'r')
	    TEMPDIR = FIXDIR[:-1]+DIRPATH
	    mkdirp(TEMPDIR)
            fw = open(TEMPDIR+inFile+'.fix','w')
            while True:
                line = f.readline()
                if not line: break
                #matches = tool.check(line.decode('utf-8'))
		                
                matches = tool.check(line)
		for idx in range(len(matches)) :
		    if matches[idx].ruleId in ExceptRuleId :
			continue
                    print DIRPATH+inFile, matches[idx].ruleId
                    data = str(nLine)+' | '+str(matches[idx].fromx)+' '+str(matches[idx].tox)+' | '+line[matches[idx].fromx:matches[idx].tox]+' | '+matches[idx].msg+'\n'
                    print data
                    #fw.write(str(matches[idx]).encode("utf8")+'\n')
		    fw.write(str(matches[idx])+'\n')
		    fw.write(data+'\n')
                   # fw.write(data.encode("utf8")+'\n')
                nLine += 1
            fw.close()
            f.close()
示例#3
0
def make_file(inpath, tmpldict, outpath=None):
    inpath = op.abspath(inpath)
    if outpath:
        outpath = render_str(outpath, tmpldict)
        if op.isdir(outpath):
            outpath = op.join(outpath, op.basename(inpath))
            outpath = render_str(outpath, tmpldict)
    if is_binary(inpath):
        qprompt.status("Copying `%s`..." % (outpath), fsys.copy,
                       [inpath, outpath])
        return
    text = render_file(inpath, tmpldict)
    if text == None:
        return False

    # Handle rendered output.
    if outpath:
        outpath = op.abspath(outpath)
        if inpath == outpath:
            qprompt.fatal("Output cannot overwrite input template!")
        fsys.makedirs(op.dirname(outpath))
        with io.open(outpath, "w", encoding="utf-8") as f:
            qprompt.status("Writing `%s`..." % (outpath), f.write, [text])
    else:
        qprompt.echo(text)
    return True
示例#4
0
def searchlist(flist: Union[List[Path], Iterable], txt: str,
               exclude: List[str], verbose: bool):

    mat = []
    exc = set(exclude)

    for f in flist:
        if exc.intersection(set(str(f.resolve()).split('/'))):
            continue
        # note that searchfile() does NOT work for PDF even with text inside...but Grep does. Hmm..
        if f.is_file() and f.stat().st_size < MAXSIZE:
            matchinglines: List[str] = []

            if not is_binary(str(f)):
                here, matchinglines = searchfile(f, txt)
            elif f.suffix == '.pdf':
                here = searchbinary(f, txt)
            else:
                logging.info(f'skipped {f}')
                continue

            if here:
                mat.append(f)
                if verbose:
                    print(MAGENTA + str(f))
                    print(BLACK + '\n'.join(matchinglines))
                else:
                    print(f)
示例#5
0
    def _is_executable(self, file_name):
        """Checks if a file is executable or can be executed by the interpreter for the program's selected language

        Args:
            file_name (str): The name of the file to be checked

        Returns:
            bool: True if the file can be executed
        """

        extension = path.splitext(file_name)[-1]
        if os.access(file_name, os.X_OK) or extension == '.exe':
            return True

        if self.language == 'java' and extension in ['.class', '.jar']:
            return True

        if self.language == 'python' and extension in ['.py']:
            return True

        if self.language in ['bash', 'shell'
                             ] and extension in ['.sh', '.bash']:
            return True

        return self.language in ['c', 'cpp', 'c++'] and extension in [
            ''
        ] and is_binary(file_name)
示例#6
0
文件: log.py 项目: yaoy123/recipy
def log_output(filename, source):
    """Log output to the database.

    Called by patched functions that do some sort of output (writing to a file
    etc) with the filename and some sort of information about the source.

    Note: the source parameter is currently not stored in the database.
    """
    if isinstance(filename, list):
        for f in filename:
            log_output(f, source)
        return
    elif not isinstance(filename, six.string_types):
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)

    version = get_version(source)
    db = open_or_create_db()

    if option_set('data', 'file_diff_outputs') and os.path.isfile(filename) \
       and not is_binary(filename):
        tf = tempfile.NamedTemporaryFile(delete=False)
        shutil.copy2(filename, tf.name)
        add_file_diff_to_db(filename, tf.name, db)

    if option_set('general', 'debug'):
        print("Output to %s using %s" % (filename, source))
    #Update object in DB
    # data hash will be hashed at script exit, if enabled
    db.update(append("outputs", filename, no_duplicates=True), eids=[RUN_ID])
    db.update(append("libraries", version, no_duplicates=True), eids=[RUN_ID])
    db.close()
示例#7
0
    def process_state(self, state):
        if str(state['url']).startswith("file://"):
            path = state['url'][7:]
        else:
            return state

        if 'assume_nochange' in state.keys() and state['assume_nochange'] == True:
            return state

        if 'size' in state.keys() and state['size'] is not None:
            if int(state['size']) > self.size_limit:
                self.app.log.debug("%s is above size limit - skipping" % (path))
                return state
        # filter out excludes
        for pat in self.excludepats:
            if pat.match(path):
                self.app.log.debug("%s excluded by %s" % (path, pat))
                return state
        try:
            if is_binary(path):
                self.app.log.debug("%s seems to be binary - skipping" % (path))
                return state
            with open(path, mode='rb') as file:
                state['content'] = file.read().decode('utf-8', 'replace')
                #self.app.log.debug("reading conffile contents: %s" % (path))
        except:
            e = sys.exc_info()[1]
            state['content_error'] = e

        return state
示例#8
0
def send_to_syslog(filename, sock, zflag):
    """Wraps individual syslog functions. Returns linecount"""

    linecount = 0
    logging.debug("Function: send_to_syslog %s, send_zip=%s", filename, zflag)

    if os.stat(filename).st_size > 0:
        if is_binary(filename):
            if zflag:
                if filename.lower().endswith(".zip"):
                    linecount = 0
                    logging.info("Skipped zip: %s", os.path.basename(filename))

                elif filename.lower().endswith(".gz"):
                    logging.info("Sending file: %s", os.path.basename(filename))
                    linecount = send_gzip_to_syslog(filename, sock)
                    logging.info("Sent    file: %s: Lines: %s", os.path.basename(filename), linecount)

            else:
                linecount = 0
                logging.debug("Skipped binary file: %s", filename)
        else:
            logging.info("Sending file: %s", os.path.basename(filename))
            linecount = send_text_to_syslog(filename, sock)
            logging.info("Sent    file: %s: Lines: %s", os.path.basename(filename), linecount)
    else:
        linecount = 0
        logging.info("Skipped empty file: %s", os.path.basename(filename))

    return linecount
示例#9
0
def get_file_mode(file_path: str, mode: str) -> str:
    """
    Returns 'rb' if mode = 'binary'.
    Returns 'r' if mode = 'text'
    Returns 'rb' or 'r' if mode = 'auto' -> Will be automatically detected.

    Example:

        >>> get_file_mode("doesn't matter", 'binary')
        'rb'
        >>> get_file_mode("doesn't matter", 'text')
        'r'
        >>> get_file_mode(__file__, 'auto')
        'r'

    Args:
        file_path: File to load.
        mode: One of ['binary', 'text', 'auto'].

    Returns:
        One of ['rb', 'r']

    """
    if mode == 'binary':
        return 'rb'
    if mode == 'text':
        return 'r'
    if mode == 'auto':
        return 'rb' if is_binary(file_path) else 'r'

    raise ValueError(
        "Argument 'mode' is expected to be one of: auto, binary, text")
示例#10
0
    def add(self, path):
        """
            Make content available for download
            Add entry to table with Status as UPLOAD

            Returns
                TRUE on success
                FALSE on failure
        """
        if (not os.path.exists(path)) or (os.path.isfile(path) and not is_binary(path)):
            logger.warning(
                "Path: Does not Exists or Is not a Binary File".format(path))
            return False
        elif (os.path.isdir(path)):
            logger.info("Sharing all files of folder {}".format(path))
            for file in os.listdir(path):
                file = path+"/"+file
                if os.path.isfile(file):
                    # print(file)
                    self.add(file)
            return True
        else:
            logger.info("Request to Share file: {}".format(path))
            filename = os.path.splitext(path)[0].split("/")[-1]
            file_stat = os.stat(path)
            size = file_stat.st_size
            cSum = self.checksum_large(path)
            parentId = "0"
            randId = 0
            status = constants.FS_UPLOADED
            replication = None
            self.add_entry(constants.DB_TABLE_FILE, filename, path,
                           size, cSum, parentId, randId, status, replication)
            logger.info("File Share Done for {}".format(path))
            return True
示例#11
0
    def getTokensFromProject(self, projectName):
        # for currentfolder, subfolders, files in os.walk(os.getcwd()):
        for currentfolder, subfolders, files in os.walk(projectName):
            for file in files:
                if not is_binary(os.path.join(currentfolder, file)):
                    with open(os.path.join(currentfolder, file),
                              'r') as tosearch:

                        contents = tosearch.read()
                        r = re.compile(r'#{[^}]+}')
                        matches = r.findall(contents)

                        print(
                            f'{matches} in file {os.path.join(currentfolder, file)}'
                        )

                        for match in matches:
                            if match not in self.tokens:
                                self.tokens.append(match)

                    if os.path.join(currentfolder,
                                    file) not in self.tokenized_files:
                        self.tokenized_files.append(
                            os.path.join(currentfolder, file))

        print(self.tokens)
示例#12
0
文件: __init__.py 项目: vra/dompare
def diff_two_directories(logger, dir1, dir2, tmp_file, exclude):
    paths = os.listdir(dir1)

    if exclude is not None:
        exclude += ['.git']
    else:
        exclude = ['.git']

    for ex in exclude:
        if ex in paths:
            logger.debug('Ignore {}'.format(ex))
            paths.remove(ex)

    for path in paths:
        path1 = os.path.join(dir1, path)
        path2 = os.path.join(dir2, path)

        if os.path.isdir(path1):
            logger.debug('Processing dir {}'.format(path1))
            diff_two_directories(logger, path1, path2, tmp_file, exclude)

        elif is_binary(path1):
            logger.debug('Ignore binary file {}'.format(path1))
            continue

        elif not os.path.exists(os.path.join(dir2, path)):
            logger.debug(
                'Ignore single file (no same name file in dir2) {}'.format(
                    path1))
            continue

        else:
            logger.debug('Compare {} and {}'.format(path1, path2))
            diff_two_files(os.path.join(dir1, path), os.path.join(dir2, path),
                           tmp_file)
示例#13
0
文件: main.py 项目: vladaindjic/siem
def run_linux_agents(file_agents):
    # print(file_agents)
    for file_path, lin_agent in file_agents.items():
        if is_file_linux(file_path):
            # u pitanju je tekstualni parser
            if not is_binary(file_path) and ('btmp' not in file_path) and ('tallylog' not in file_path)\
                    and ('wtmp' not in file_path) and ('utmp' not in file_path) and ('lastlog' not in file_path):
                lin_agent.run()
            else:
                # da li je za lastlog
                if 'lastlog' in file_path:
                    LastlogAgent(lin_agent).run()
                # faillog
                elif 'faillog' in file_path:
                    FailLogAgent(lin_agent).run()
                # tallylog
                elif 'tallylog' in file_path:
                    TallyLogAgent(lin_agent).run()
                # utmp, wtmp, btmp
                else:
                    type = 'wtmp'
                    if 'utmp' in file_path:
                        type = 'utmp'
                    elif 'btmp' in file_path:
                        type = 'btmp'
                    UWBTmpAgent(lin_agent, type).run()
示例#14
0
    def predict(self, blob):
        global classifier_model
        if not classifier_model:
            filename = '../resources/finalized_model.joblib.pkl'
            classifier_model = joblib.load(filename)
            log.info('Model loaded')

        time = datetime.datetime.now()
        log.info("Starting prediction")

        Examples = []
        Files = [blob, '../resources/finalized_model.joblib.pkl']

        for file_ in Files:
            if is_binary(file_):
                pass
            else:
                with open(file_, "r") as src_file_:
                    Examples.append(src_file_.read())

        predict_examples = classifier_model.predict(Examples)

        log.info("Finished with prediction within {} ".format(
            datetime.datetime.now() - time))
        log.info("predict_examples {} ".format(predict_examples))

        return predict_examples
示例#15
0
文件: utils.py 项目: yustoris/dvc
def file_md5(fname):
    """ get the (md5 hexdigest, md5 digest) of a file """
    if os.path.exists(fname):
        hash_md5 = hashlib.md5()
        # when dealing with large collections of binary files, the is_binary
        # call becomes a bottleneck, here we first check the extension to
        # avoid this bottleneck when possible:
        if fname.split('.')[-1] in BINARY_FILE_EXTENSIONS:
            binary = True
        else:
            binary = is_binary(fname)

        with open(fname, 'rb') as fobj:
            while True:
                data = fobj.read(LOCAL_CHUNK_SIZE)
                if not data:
                    break

                if binary:
                    chunk = data
                else:
                    chunk = dos2unix(data)

                hash_md5.update(chunk)

        return (hash_md5.hexdigest(), hash_md5.digest())
    else:
        return (None, None)
示例#16
0
def licence_check(licence_ext, licence_ignore, project, project_dir):
    """ Peform basic checks for the presence of licence strings """
    for root, dirs, files in os.walk(project_dir):
        dirs[:] = [d for d in dirs if d not in ignore_dirs]
        for file in files:
            if file.endswith(tuple(licence_ext)) \
                    and file not in licence_ignore:
                full_path = os.path.join(root, file)
                if not is_binary(full_path):
                    fo = open(full_path, 'r')
                    content = fo.read()
                    # Note: Hardcoded use of 'copyright' & 'spdx' is the result
                    # of a decision made at 2017 plugfest to limit searches to
                    # just these two strings.
                    patterns = [
                        'copyright', 'spdx',
                        'http://creativecommons.org/licenses/by/4.0'
                    ]
                    if any(i in content.lower() for i in patterns):
                        logger.info('Licence string present: %s', full_path)
                    else:
                        logger.error('Licence header missing: %s', full_path)
                        with open(reports_dir + "licence-" + project + ".log",
                                  "a") \
                                as gate_report:
                            gate_report.write(
                                'Licence header missing: {0}\n'.format(
                                    full_path))
示例#17
0
    def process_state(self, state):
        if str(state['url']).startswith("file://"):
            path = state['url'][7:]
        else:
            return state

        if 'assume_nochange' in state.keys(
        ) and state['assume_nochange'] == True:
            return state

        if 'size' in state.keys() and state['size'] is not None:
            if int(state['size']) > self.size_limit:
                self.app.log.debug("%s is above size limit - skipping" %
                                   (path))
                return state
        # filter out excludes
        for pat in self.excludepats:
            if pat.match(path):
                self.app.log.debug("%s excluded by %s" % (path, pat))
                return state
        try:
            if is_binary(path):
                self.app.log.debug("%s seems to be binary - skipping" % (path))
                return state
            with open(path, mode='rb') as file:
                state['content'] = file.read().decode('utf-8', 'replace')
                #self.app.log.debug("reading conffile contents: %s" % (path))
        except:
            e = sys.exc_info()[1]
            state['content_error'] = e

        return state
示例#18
0
def grep(dir, str, recursive=False, ignorecase=False):
    paths_list = os.listdir(dir)
    for subpath in paths_list:
        name_to_check = subpath
        if ignorecase:
            name_to_check = subpath.lower()
            str = str.lower()

        full_path = os.path.join(dir, subpath)
        # print("Checking", full_path)
        if str in name_to_check:
            print("Filename matches template:", full_path)

        if not os.path.isdir(full_path):
            file_is_binary = False
            if binaryornot_installed:
                file_is_binary = is_binary(full_path)

            if not file_is_binary:
                with open(full_path, 'r') as f:
                    line_num = 0
                    for line in f:
                        line_num += 1
                        line_to_compare = line
                        if ignorecase:
                                    line_to_compare = line_to_compare.lower()

                        if str in line_to_compare:
                            print("Line number {num} in file {path} matches template: {line}".format(num=line_num, path=full_path, line=line.strip()))

        if os.path.isdir(full_path):
            if recursive:
                grep(full_path, str, recursive, ignorecase)

    return 0
def download_item(item):
    """ Download a given pair of unprocessed and processed files  
    """

    url, output_dir, overwrite = item
    output_filepath = os.path.join(output_dir, os.path.basename(url))
    output_filepath_wav = os.path.join(output_dir,
                                       os.path.basename(url)[0:-4] + '.wav')

    ib = True

    extra = ' >/dev/null 2>&1'
    if not os.path.exists(output_filepath) or overwrite:

        # Download the file
        cmd = 'wget -O ' + output_filepath + ' ' + url
        ret = os.system(cmd)

        ib = is_binary(output_filepath)
        if not ib:
            print("BAD", output_filepath)
            cmd = 'rm ' + output_filepath + extra
            ret = os.system(cmd)
        else:

            if '.mp3' in output_filepath:
                # Convert to wav using sox
                cmd = 'sox -v 0.99 ' + output_filepath + ' ' + output_filepath_wav + extra
                ret = os.system(cmd)
                cmd = 'rm ' + output_filepath + extra
                ret = os.system(cmd)

        return not ib
    else:
        return False
示例#20
0
文件: utils.py 项目: rizplate/dvc
def file_md5(fname):
    """ get the (md5 hexdigest, md5 digest) of a file """
    if os.path.exists(fname):
        hash_md5 = hashlib.md5()
        binary = is_binary(fname)

        if binary:
            mode = "rb"
        else:
            mode = "r"

        with open(fname, mode) as fobj:
            while True:
                data = fobj.read(LOCAL_CHUNK_SIZE)
                if not data:
                    break

                if binary:
                    chunk = data
                else:
                    if sys.version_info[0] == 2:
                        data = data.decode('utf-8')
                    chunk = dos2unix(data).encode('utf-8')

                hash_md5.update(chunk)

        return (hash_md5.hexdigest(), hash_md5.digest())
    else:
        return (None, None)
示例#21
0
def get_contents(side):
    if side not in ('a', 'b'):
        return error('invalid side', 'Side must be "a" or "b", got %s' % side)

    # TODO: switch to index? might be simpler
    path = request.form.get('path', '')
    if not path:
        return error('incomplete', 'Incomplete request (need path)')

    idx = diff.find_diff_index(DIFF, side, path)
    if idx is None:
        return error('not found', 'Invalid path on side %s: %s' % (side, path))

    d = DIFF[idx]
    abs_path = d.a_path if side == 'a' else d.b_path

    try:
        if is_binary(abs_path):
            size = os.path.getsize(abs_path)
            contents = "Binary file (%d bytes)" % size
        else:
            contents = open(abs_path).read()
        return Response(contents, mimetype='text/plain')
    except Exception:
        return error('read-error', 'Unable to read %s' % abs_path)
def check_paths(paths):
    for path in paths:
        if is_binary(path):
            continue
        for line in open(path, 'r', encoding="latin-1"):
            match = RE_OBJ.search(line)
            msg = 'variable not replaced in {}'
            assert match is None, msg.format(path)
示例#23
0
文件: finja.py 项目: schtibe/finja
def read_index(db, file_, file_path, update = False):
    global _index_count
    con          = db[0]
    token_dict   = db[1]
    encoding     = "UTF-8"
    if is_binary(file_path):
        if not update:
            print("%s: is binary, skipping" % (file_path,))
    else:
        if _args.batch > 0:
            _index_count += 1  # noqa
            if _index_count > _args.batch:
                con.close()
                sys.exit(0)
        try:
            inserts      = set()
            insert_count = parse_file(db, file_, file_path, inserts, encoding)
        except UnicodeDecodeError as e:
            try:
                with open(file_path, "rb") as f:
                    detector = UniversalDetector()
                    for line in f.readlines():
                        detector.feed(line)
                        if detector.done:
                            break
                    detector.close()
                    encoding = detector.result['encoding']
                if not encoding:
                    raise e
                inserts      = set()
                insert_count = parse_file(
                    db, file_, file_path, inserts, encoding
                )
            except UnicodeDecodeError:
                print("%s: decoding failed %s" % (
                    file_path,
                    encoding
                ))
                inserts.clear()
                return encoding
        tokens = set([x[0] for x in inserts])
        for token in tokens:
            inserts.add((token, file_, -1))
        with con:
            new = token_dict.commit()
            con.execute(_clear_existing_index, (file_,))
            con.executemany(_insert_index, inserts)
        unique_inserts = len(inserts)
        print("%s: indexed %s/%s (%.3f) new: %s %s" % (
            file_path,
            unique_inserts,
            insert_count,
            float(unique_inserts) / (insert_count + 0.0000000001),
            new,
            encoding
        ))
        clear_cache(db)
    return encoding
示例#24
0
def read_index(db, file_, file_path, update = False):
    global _index_count
    con          = db[0]
    token_dict   = db[1]
    encoding     = "UTF-8"
    if is_binary(file_path):
        if not update:
            print("%s: is binary, skipping" % (file_path,))
    else:
        if _args.batch > 0:
            _index_count += 1  # noqa
            if _index_count > _args.batch:
                con.close()
                sys.exit(0)
        try:
            inserts      = set()
            insert_count = parse_file(db, file_, file_path, inserts, encoding)
        except UnicodeDecodeError as e:
            try:
                with open(file_path, "rb") as f:
                    detector = UniversalDetector()
                    for line in f.readlines():
                        detector.feed(line)
                        if detector.done:
                            break
                    detector.close()
                    encoding = detector.result['encoding']
                if not encoding:
                    raise e
                inserts      = set()
                insert_count = parse_file(
                    db, file_, file_path, inserts, encoding
                )
            except UnicodeDecodeError:
                print("%s: decoding failed %s" % (
                    file_path,
                    encoding
                ))
                inserts.clear()
                return encoding
        tokens = set([x[0] for x in inserts])
        for token in tokens:
            inserts.add((token, file_, -1))
        with con:
            new = token_dict.commit()
            con.execute(_clear_existing_index, (file_,))
            con.executemany(_insert_index, inserts)
        unique_inserts = len(inserts)
        print("%s: indexed %s/%s (%.3f) new: %s %s" % (
            file_path,
            unique_inserts,
            insert_count,
            float(unique_inserts) / (insert_count + 0.0000000001),
            new,
            encoding
        ))
        clear_cache(db)
    return encoding
示例#25
0
def run(args, out=sys.stdout) -> int:
    """Add headers to files."""
    if not any((args.copyright, args.license)):
        args.parser.error(_("option --copyright or --license is required"))

    if args.exclude_year and args.year:
        args.parser.error(
            _("option --exclude-year and --year are mutually exclusive"))

    paths = [_determine_license_path(path) for path in args.path]

    # First loop to verify before proceeding
    if args.style is None:
        _verify_paths_supported(paths, args.parser)

    project = create_project()
    template = None
    commented = False
    if args.template:
        try:
            template = _find_template(project, args.template)
        except TemplateNotFound:
            args.parser.error(
                _("template {template} could not be found").format(
                    template=args.template))

        if ".commented" in Path(template.name).suffixes:
            commented = True

    year = None
    if not args.exclude_year:
        if args.year:
            year = args.year
        else:
            year = datetime.date.today().year

    expressions = set(args.license) if args.license is not None else set()
    copyright_lines = (set(
        make_copyright_line(x, year=year)
        for x in args.copyright) if args.copyright is not None else set())

    spdx_info = SpdxInfo(expressions, copyright_lines)

    result = 0
    for path in paths:
        binary = is_binary(str(path))
        if binary or args.explicit_license:
            new_path = f"{path}.license"
            if binary:
                _LOGGER.info(
                    _("'{path}' is a binary, therefore using '{new_path}' "
                      "for the header").format(path=path, new_path=new_path))
            path = Path(new_path)
            path.touch()
        result += _add_header_to_file(path, spdx_info, template, commented,
                                      args.style, out)

    return min(result, 1)
def main(args=None):
    """Main program entry point,

    :param args: list of string command line arguments
    :type args: list[str]
    :return: program exit code or error string
    :rtype: int|str

    """
    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument('-e', '--exclude', metavar='PATH', action='append',
                    help="Exclude given path or filename (can be given more than once)")
    ap.add_argument('-v', '--verbose', action='store_true',
                    help="Report which files are processed and updatzed to stdout")
    ap.add_argument('files', nargs='*',
                    help="Only process given files (default: all versioned files)")

    args = ap.parse_args(args)

    def report(msg):
        if args.verbose:
            print(msg)

    git_files = []
    for path in check_output(['git', 'ls-files'] + args.files).decode().splitlines():
        date = parse(check_output(
            ['git', '--no-pager', 'log', '-1', '--date=iso', '--pretty=format:%ai', '--', path]
        ).decode())
        git_files.append((date, path))

    for date, path in filter_excluded(git_files, args.exclude or [], itemgetter(1)):
        if is_binary(path):
            report("Ignoring binary file '%s'." % path)
            continue

        repl_func = partial(replace_year, date.year)
        try:
            with open(path) as fp:
                file_contents = fp.read()
        except OSError as exc:
            print("Could not read file '%s': %s" % (path, exc), file=sys.stderr)

        match = COPYRIGHT_PTN.search(file_contents)
        if match:
            new_contents, num_subst = COPYRIGHT_PTN.subn(repl_func, file_contents)

            if num_subst > 0 and file_contents.strip() != new_contents.strip():
                report("Updating copyright year in '%s' to %s." % (path, date.year))

                try:
                    with open(path, 'w') as fp:
                        fp.write(new_contents)
                except OSError as exc:
                    print("Could not update file '%s': %s" % (path, exc), file=sys.stderr)
            else:
                report("Copyright statements(s) in '%s' already up to date." % path)
        else:
            report("No copyright statement found in '%s'." % path)
示例#27
0
def generate_file(project_dir, infile, context, env):
    """
    1. Render the filename of infile as the name of outfile.
    2. Deal with infile appropriately:

        a. If infile is a binary file, copy it over without rendering.
        b. If infile is a text file, render its contents and write the 
           rendered infile to outfile.

    .. precondition::
    
        When calling `generate_file()`, the root template dir must be the
        current working directory. Using `utils.work_in()` is the recommended
        way to perform this directory change.

    :param project_dir: Absolute path to the resulting generated project.
    :param infile: Input file to generate the file from. Relative to the root
        template dir.
    :param context: Dict for populating the cookiecutter's variables.
    :param env: Jinja2 template execution environment.
    """

    logging.debug("Generating file {0}".format(infile))

    # Render the path to the output file (not including the root project dir)
    outfile_tmpl = Template(infile)
    outfile = os.path.join(project_dir, outfile_tmpl.render(**context))
    logging.debug("outfile is {0}".format(outfile))

    # Just copy over binary files. Don't render.
    logging.debug("Check {0} to see if it's a binary".format(infile))
    if is_binary(infile):
        logging.debug("Copying binary {0} to {1} without rendering"
                      .format(infile, outfile))
        shutil.copyfile(infile, outfile)
    else:
        # Force fwd slashes on Windows for get_template
        # This is a by-design Jinja issue
        infile_fwd_slashes = infile.replace(os.path.sep, '/')

        # Render the file
        try:
            tmpl = env.get_template(infile_fwd_slashes)
        except TemplateSyntaxError as exception:
            # Disable translated so that printed exception contains verbose
            # information about syntax error location
            exception.translated = False
            raise
        rendered_file = tmpl.render(**context)

        logging.debug("Writing {0}".format(outfile))

        with unicode_open(outfile, 'w') as fh:
            fh.write(rendered_file)
            fh.write('\n')

    # Apply file permissions to output file
    shutil.copymode(infile, outfile)
    def readFolder(directory, multipleFileHeaders=False):
        """
        dumps process-ready format from directory
        Returns raw numpy array of vectors, file_header_files and odt data for
        2d plotting
        :param directory
        :return rawVectorData, file_headers, getPlotData
        """

        files_in_directory, ext = MultiprocessingParse.guess_file_type(
            directory)
        ext_files = glob.glob(os.path.join(directory, '*' + ext[0]))
        test_file = os.path.join(directory, ext_files[0])

        stages = len(ext_files)
        plot_file = glob.glob(os.path.join(directory, ext[1]))
        # look for .odt  or .txt in current directory
        if len(plot_file) > 1:
            raise ValueError("plot file extension conflict (too many)")
            #TODO error window
        elif not plot_file or plot_file is None:
            plot_data = None
            plot_file = None

        # NOTE: this should recognize both .omf and .ovf files
        trigger_list = None
        if plot_file is not None:
            plot_data, stages0 = getPlotData(plot_file[0])
            print(stages0, stages)
            if stages0 != stages:
                if stages0 > stages:
                    trigger_list = MultiprocessingParse.\
                                        compose_trigger_list(ext_files,
                                                                    plot_data)
                    stages = len(trigger_list)
                    print(trigger_list)
                    print("TRIGGER LIST : {}, {}".format(
                        stages, len(trigger_list)))
                elif stages0 < stages:
                    raise ValueError("Odt cannot have fewer stages that files")
        else:
            plot_data = None

        if not is_binary(test_file):
            rawVectorData = MultiprocessingParse.readText(files_in_directory)
            file_for_header = glob.glob(os.path.join(directory, '*' + ext[0]))
            # virtually any will do
            if not file_for_header:
                raise ValueError("no .omf  or .ovf file has been found")
            header = getFileHeader(file_for_header[0])
        else:
            headers, rawVectorData = MultiprocessingParse.readBinary(
                files_in_directory)
            header = headers[0]
            if not header:
                raise ValueError("no .omf or .ovf file has been found")
        return rawVectorData, header, plot_data, stages, trigger_list
def generate_file(project_dir, infile, context, env):
    """
    1. Render the filename of infile as the name of outfile.
    2. Deal with infile appropriately:

        a. If infile is a binary file, copy it over without rendering.
        b. If infile is a text file, render its contents and write the
           rendered infile to outfile.

    Precondition:

        When calling `generate_file()`, the root template dir must be the
        current working directory. Using `utils.work_in()` is the recommended
        way to perform this directory change.

    :param project_dir: Absolute path to the resulting generated project.
    :param infile: Input file to generate the file from. Relative to the root
        template dir.
    :param context: Dict for populating the cookiecutter's variables.
    :param env: Jinja2 template execution environment.
    """

    logging.debug('Generating file {0}'.format(infile))

    # Render the path to the output file (not including the root project dir)
    outfile_tmpl = Template(infile)
    outfile = os.path.join(project_dir, outfile_tmpl.render(**context))
    logging.debug('outfile is {0}'.format(outfile))

    # Just copy over binary files. Don't render.
    logging.debug("Check {0} to see if it's a binary".format(infile))
    if is_binary(infile):
        logging.debug('Copying binary {0} to {1} without rendering'.format(
            infile, outfile))
        shutil.copyfile(infile, outfile)
    else:
        # Force fwd slashes on Windows for get_template
        # This is a by-design Jinja issue
        infile_fwd_slashes = infile.replace(os.path.sep, '/')

        # Render the file
        try:
            tmpl = env.get_template(infile_fwd_slashes)
        except TemplateSyntaxError as exception:
            # Disable translated so that printed exception contains verbose
            # information about syntax error location
            exception.translated = False
            raise
        rendered_file = tmpl.render(**context)

        logging.debug('Writing {0}'.format(outfile))

        with io.open(outfile, 'w', encoding='utf-8') as fh:
            fh.write(rendered_file)

    # Apply file permissions to output file
    shutil.copymode(infile, outfile)
示例#30
0
def _verify_paths_comment_style(paths: List[Path], parser: ArgumentParser):
    for path in paths:
        style = _get_comment_style(path)
        # TODO: This check is duplicated.
        if style is None and not is_binary(str(path)):
            parser.error(
                _("'{path}' does not have a recognised file extension,"
                  " please use --style, --explicit-license or"
                  " --skip-unrecognised").format(path=path))
示例#31
0
文件: rename.py 项目: dkrikun/rename
def process_file(src, dest, word_option, path,  # pylint: disable=R0913
                 diff, text_only):
    """Rename in a file."""

    if is_binary(path):
        return

    # if --text-only requested, do not perform substitutions in filepath
    if not text_only:
        new_path = edit_line(src, dest, path, word_option)
    else:
        new_path = path

    try:
        with io.open(path, 'r', encoding='utf-8') as in_file:
            in_lines = in_file.readlines()
    except IOError as e:
        logging.warn('could not read file, error message: {1}'
                .format(path, e))
        return
    except UnicodeDecodeError as e:
        logging.debug('could not read file, error message: {1}'
                .format(path, e))
        return

    # perform substitions in file contents
    out_lines = list(edit_text(src, dest, in_lines, word_option))

    # only output diff to stdout, do not write anything to file (if requested
    # by --diff)
    if diff:
        diffs = difflib.unified_diff(in_lines, out_lines,
                                     fromfile=path, tofile=new_path)
        for line in diffs:
            sys.stdout.write(line)
    else:
        try:
            with io.open(new_path, 'w', encoding='utf-8') as out_file:
                out_file.writelines(out_lines)
        except IOError as e:
            logging.warn('could not write file, error message: {1}'
                    .format(path, e))

        if new_path != path:
            try:
                # explicitly mkdir missing directories (due to possible subst.
                # in filepath)
                new_dir = os.path.dirname(new_path)
                if not os.path.exists(new_dir):
                    os.makedirs(new_dir)
                shutil.copymode(path, new_path)
                os.unlink(path)
            except OSError as e:
                logging.warn('could not delete file, error message: {1}'
                        .format(path,e))
                return
示例#32
0
def get_cached_pdf(headers, item, decoded):
    file_path = item['path']
    file_name = file_path.replace('/', '__') \
        + pathlib.Path(item['rootName']).suffix
    file_name_convert = file_name + '.pdf'
    cached_pdf[decoded['username']] = cached_pdf.get(decoded['username']) or []
    cached_item = next(
        (item for item in cached_pdf[decoded['username']] if item['path'] == file_path), None)
    upload_folder_path = ROOT_DIR+'/uploads/' + decoded['username']
    if not os.path.exists(upload_folder_path):
        os.makedirs(upload_folder_path)
    path_file_converted = upload_folder_path + '/' + file_name_convert
    path_file_download = upload_folder_path + '/' + file_name
    if cached_item and (datetime.now() - cached_item['ts']).total_seconds() <= CACHE_LIFE_TIME:
        print((datetime.now() - cached_item['ts']).total_seconds())
        cached_item['ts'] = datetime.now()
        try:
            PyPDF2.PdfFileReader(open(path_file_download, "rb"))
        except PyPDF2.utils.PdfReadError:
            return base64.b64encode(open(path_file_converted, "rb").read())
        else:
            return base64.b64encode(open(path_file_download, "rb").read())
    cached_item = {}
    response = SendRequest(headers, file_path)
    url = response.json()['url']
    filedata = requests.get(url)
    if filedata.status_code == 404:
        return "NO FILE PREVIEW"
    if filedata.status_code == 200:
        with open(path_file_download, 'wb') as f:
            f.write(filedata.content)
    if is_binary(path_file_download) and not item['rootName'].lower().endswith(
            ('jpg', 'JPG', 'png', 'PNG', 'jpeg', 'JPEG', 'gif', 'GIF',
             'bmp', 'BMP', 'svg', 'SVG', 'pdf', 'las', 'asc', 'LAS', 'TXT',
             'ASC', 'csv', 'CSV', 'xlsx', 'XLSX', 'XLS', 'xls', 'ppt', 'PPT',
             'pptx', 'PPTX', 'doc', 'DOC', 'docx', 'DOCX',
             'mpp')):
        return {'isNotReadable': 1}
    try:
        PyPDF2.PdfFileReader(open(path_file_download, "rb"))
    except PyPDF2.utils.PdfReadError:
        # if path_file_download.lower().endswith(('xlsx', 'xls', 'csv')):
        # ConvertFileExcel(path_file_download)
        # else:
        ConvertFile(path_file_download)
    else:
        path_file_converted = path_file_download
    cached_item['path'] = file_path
    cached_item['ts'] = datetime.now()
    cached_pdf[decoded['username']].append(cached_item)
    try:
        readFile = open(path_file_converted, "rb")
    except IOError:
        return "NO PDF FILE TO PREVIEW"
    else:
        return base64.b64encode(open(path_file_converted, "rb").read())
示例#33
0
def _verify_paths_supported(paths, parser):
    for path in paths:
        try:
            COMMENT_STYLE_MAP[path.suffix]
        except KeyError:
            # TODO: This check is duplicated.
            if not is_binary(str(path)):
                parser.error(
                    _("'{}' does not have a recognised file extension, "
                      "please use --style".format(path)))
示例#34
0
def check_password_replaced(paths):
    PATTERN = 'POSTGRES_PASSWORD!!!'
    RE_OBJ = re.compile(PATTERN)

    for path in paths:
        if not is_binary(path):
            for line in open(path, 'r'):
                match = RE_OBJ.search(line)
                msg = 'password variable not replaced in {}'
                assert match is None, msg.format(path)
示例#35
0
def check_paths(paths):
    """Method to check all paths have correct substitutions."""
    # Assert that no match is found in any of the files
    for path in paths:
        if is_binary(path):
            continue

        for line in open(path, "r"):
            match = RE_OBJ.search(line)
            assert match is None, f"cookiecutter variable not replaced in {path}"
def main():
    root_path = os.getcwd()
    for dirpath, dirnames, filenames in os.walk(root_path):
        for filename in filenames:
            path = os.path.join(dirpath, filename)
            if not is_binary(path):
                fd = open(path, 'a+b')
                fd.seek(-1, os.SEEK_END)
                if not fd.read(1) == '\n':
                    fd.seek(0, os.SEEK_END)
                    fd.write('\n')
示例#37
0
def generate_html(templates_dir, output_dir, context=None,
                  unexpanded_templates=()):
    """
    Renders the HTML templates from `templates_dir`, and writes them to
    `output_dir`.

    :param templates_dir: The Complexity templates directory,
        e.g. `project/templates/`.
    :paramtype templates_dir: directory
    :param output_dir: The Complexity output directory, e.g. `www/`.
    :paramtype output_dir: directory
    :param context: Jinja2 context that holds template variables. See
        http://jinja.pocoo.org/docs/api/#the-context
    """

    logging.debug('Templates dir is {0}'.format(templates_dir))
    if not os.path.exists(templates_dir):
        raise MissingTemplateDirException(
            'Your project is missing a templates/ directory containing your \
            HTML templates.'
        )

    context = context or {}
    env = Environment()
    # os.chdir(templates_dir)
    env.loader = FileSystemLoader(templates_dir)

    # Create the output dir if it doesn't already exist
    make_sure_path_exists(output_dir)

    for root, dirs, files in os.walk(templates_dir):
        for f in files:
            # print(f)
            template_filepath = os.path.relpath(
                os.path.join(root, f),
                templates_dir
            )

            force_unexpanded = template_filepath in unexpanded_templates
            logging.debug('Is {0} in {1}? {2}'.format(
                template_filepath,
                unexpanded_templates,
                force_unexpanded
            ))

            if is_binary(os.path.join(templates_dir, template_filepath)):
                print('Non-text file found: {0}. Skipping.'.
                      format(template_filepath))
            else:
                outfile = get_output_filename(template_filepath, output_dir,
                                              force_unexpanded)
                print('Copying {0} to {1}'.format(template_filepath, outfile))
                generate_html_file(template_filepath, output_dir, env, context,
                                   force_unexpanded)
示例#38
0
文件: files.py 项目: hevi9/hevi-lib
 def mime_type(self):
     """ Determine mime type of the file content. """
     if self._mime_type is None:
         type = guess_type(self.abspath, False)[0]
         if type is None:
             if is_binary(self.abspath):
                 type = DEFAULT_MIME_TYPE_BIN
             else:
                 type = DEFAULT_MIME_TYPE_TEXT
         self._mime_type = type
     return self._mime_type
def check_paths(paths):
    """Method to check all paths have correct substitutions,
    used by other tests cases
    """
    # Assert that no match is found in any of the files
    for path in paths:
        if is_binary(path):
            continue
        for line in open(path, 'r'):
            match = RE_OBJ.search(line)
            msg = "cookiecutter variable not replaced in {}"
            assert match is None, msg.format(path)
def ensure_newlines():
    root_path = os.getcwd()
    for dirpath, _, filenames in os.walk(root_path):
        for filename in filenames:
            path = os.path.join(dirpath, filename)
            if not is_binary(path):
                fd = open(path, 'a+b')
                try:
                    fd.seek(-1, os.SEEK_END)
                    if not fd.read(1) == '\n':
                        fd.seek(0, os.SEEK_END)
                        fd.write('\n')
                except IOError:
                    # This was an empty file, so do nothing
                    pass
示例#41
0
def generate_file(project_dir, infile, context, env):
    """
    1. Render the contents of infile.
    2. Render the filename of infile as the name of outfile.
    3. Write the rendered infile to outfile.
    :param infile: Input file to generate the file from.
    """
    logging.debug("Generating file {0}".format(infile))

    # Render the intermediary path to the output file (not including the root
    # project dir nor the filename itself)
    outdir_tmpl = Template(os.path.dirname(infile))
    outdir = outdir_tmpl.render(**context)

    # Write the file to the corresponding place
    fname = os.path.basename(infile)  # input/output filename
    outfile = os.path.join(project_dir, outdir, fname)
    logging.debug("outfile is {0}".format(outfile))

    # Just copy over binary files. Don't render.
    logging.debug("Check {0} to see if it's a binary".format(infile))
    if is_binary(infile):
        logging.debug("Copying binary {0} to {1} without rendering"
                      .format(infile, outfile))
        shutil.copyfile(infile, outfile)

    else:
        # Force fwd slashes on Windows for get_template
        # This is a by-design Jinja issue
        infile_fwd_slashes = infile.replace(os.path.sep, '/')

        # Render the file
        try:
            tmpl = env.get_template(infile_fwd_slashes)
        except TemplateSyntaxError as exception:
            # Disable translated so that printed exception contains verbose
            # information about syntax error location
            exception.translated = False
            raise
        rendered_file = tmpl.render(**context)

        # Render the output filename before writing
        name_tmpl = Template(outfile)
        rendered_name = name_tmpl.render(**context)
        logging.debug("Writing {0}".format(rendered_name))

        with unicode_open(rendered_name, 'w') as fh:
            fh.write(rendered_file)
示例#42
0
    def get_file_encoding(path):
        if is_binary(path):
            return 'binary'
        try:
            encodings = template['encodings']
        except KeyError:
            encodings = ['utf-8', 'cp1252']

        for encoding in encodings:
            try:
                with open(path, encoding=encoding) as f:
                    f.read()
            except UnicodeDecodeError:
                continue
            else:
                return encoding
def do(process_parameters, settings, filename):
    file_pass = False
    counter = 0
    while not file_pass:
        try:
            from_address = settings['email_address']
            to_address = process_parameters['email_to']
            to_address_list = to_address.split(", ")
            msg = MIMEMultipart()

            filename_no_path = os.path.basename(filename)

            if process_parameters['email_subject_line'] != "":
                date_time = str(time.ctime())
                subject_line_constructor = process_parameters['email_subject_line']
                msg['Subject'] = subject_line_constructor.replace("%datetime%", date_time).replace("%filename%",
                                                                                                   filename_no_path)
            else:
                msg['Subject'] = str(filename_no_path) + " Attached"

            msg['From'] = from_address
            msg['To'] = to_address

            body = str(filename_no_path) + " Attached"

            msg.attach(MIMEText(body, 'plain'))

            with open(filename, 'rb') as attachment:

                part = MIMEBase('application', 'octet-stream; name="%s"' % filename_no_path)
                part.set_payload(attachment.read())
                if is_binary(filename):
                    encoders.encode_base64(part)
                part.add_header('X-Attachment-Id', '1')
                part.add_header('Content-Disposition', 'attachment; filename="%s"' % filename_no_path)

                msg.attach(part)
                server = smtplib.SMTP(str(settings['email_smtp_server']), str(settings['smtp_port']))
                server.starttls()
                server.login(from_address, settings['email_password'])
                server.sendmail(from_address, to_address_list, msg.as_string())
                server.close()
                file_pass = True
        except Exception:
            if counter == 10:
                raise
            counter += 1
示例#44
0
	def get_context_data(self, **kwargs):
		context = super(SampleDetail, self).get_context_data(**kwargs)
		object = kwargs['object']
		file_ = object.fasta_file.file.name
		if is_binary(file_):
			file_head = 'The uploaded file is a binary file !!'
		else:
			file_head = ''
			with open(file_) as f:
				index = 0
				line = f.readline()
				while line and index < 6:
					file_head += line
					line = f.readline()
					index += 1
		context['file_head'] = file_head
		return context
示例#45
0
def read_directory(project_directory):
    directory_entries = []

    project_parent_directory = os.path.normpath(
        os.path.join(project_directory, os.pardir)
    )

    ignore_path = os.path.join(project_directory, '.gitignore')
    ignore_list = []
    if os.path.exists(ignore_path):
        ignore_list = read_ignore(read_file(ignore_path))

    for root, dirs, files in os.walk(project_directory):

        directory_path = os.path.relpath(root, project_parent_directory)

        if '.git' in directory_path:
            dirs[:] = []
            continue

        directory = {
            'path': directory_path,
            'files': [],
        }

        if ignore_list:
            files = remove_ignores(files, ignore_list)

        for file_path in files:
            if file_path.startswith('.'):
                continue

            file_path = os.path.join(root, file_path)

            content = read_file(file_path)
            is_binary = check.is_binary(file_path)

            directory['files'].append({
                'path': os.path.relpath(file_path, project_parent_directory),
                'binary': is_binary,
                'content': content,
            })

        directory_entries.append(directory)

    return directory_entries
示例#46
0
    def check_paths(self, paths):
        """
        Method to check all paths have correct substitutions,
        used by other tests cases
        """
        # Construct the cookiecutter search pattern
        pattern = "{{(\s?cookiecutter)[.](.*?)}}"
        re_obj = re.compile(pattern)

        # Assert that no match is found in any of the files
        for path in paths:
            if not is_binary(path):
                for line in open(path, 'r'):
                    match = re_obj.search(line)
                    self.assertIsNone(
                        match,
                        "cookiecutter variable not replaced in {}".format(path))
示例#47
0
        def worker(re_text, file_queue, result_queue, logger, timeout):
            while int(time.time()) < timeout:
                if file_queue.empty() is not True:
                    f_path = file_queue.get()
                    try:
                        if not is_binary(f_path):
                            mime = mimetypes.guess_type(f_path)[0]

                            # исключаем некоторые mime типы из поиска
                            if mime not in ["application/pdf", "application/rar"]:
                                with open(f_path, "rb") as fp:
                                    for line in fp:
                                        try:
                                            line = as_unicode(line)
                                        except UnicodeDecodeError:
                                            charset = chardet.detect(line)
                                            if charset.get("encoding") in ["MacCyrillic"]:
                                                detected = "windows-1251"
                                            else:
                                                detected = charset.get("encoding")

                                            if detected is None:
                                                break
                                            try:
                                                line = str(line, detected, "replace")
                                            except LookupError:
                                                pass

                                        if re_text.match(line) is not None:
                                            result_queue.put(f_path)
                                            # logger.debug("matched file = %s " % f_path)
                                            break

                    except UnicodeDecodeError as unicode_e:
                        logger.error("UnicodeDecodeError %s, %s" % (str(unicode_e), traceback.format_exc()))

                    except IOError as io_e:
                        logger.error("IOError %s, %s" % (str(io_e), traceback.format_exc()))

                    except Exception as other_e:
                        logger.error("Exception %s, %s" % (str(other_e), traceback.format_exc()))
                    finally:
                        file_queue.task_done()
                else:
                    time.sleep(REQUEST_DELAY)
示例#48
0
文件: dys.py 项目: jankoslavic/DyS
    def openFile(self):
        """
        Open file
        :return:
        """
        file_dialog = QtGui.QFileDialog()

        filename, file_type = file_dialog.getOpenFileNameAndFilter(self,
                                                                   caption='Open file',
                                                                   directory=QtCore.QString(self.MBD_folder_abs_path),
                                                                   filter=self._file_types)

        filename = str(filename)
        if filename:
            if is_binary(filename):
                with open(filename, 'rb') as _file:
                    print "_file =", _file
                    self.MBD_system = dill.load(_file)
            else:
                pass
示例#49
0
def generate_file(infile, context, env):
    """
    1. Render the contents of infile.
    2. Render the filename of infile as the name of outfile.
    3. Write the rendered infile to outfile.
    :param infile: Input file to generate the file from.
    """
    logging.debug("Generating file {0}".format(infile))

    # Render the path to the output file (but don't include the filename)
    outdir_tmpl = Template(os.path.dirname(os.path.abspath(infile)))
    outdir = outdir_tmpl.render(**context)
    fname = os.path.basename(os.path.abspath(infile))  # input/output filename
    # Write it to the corresponding place in output_dir
    outfile = os.path.join(outdir, fname)
    logging.debug("outfile is {0}".format(outfile))

    # Just copy over binary files. Don't render.
    logging.debug("Check {0} to see if it's a binary".format(infile))
    if is_binary(infile):
        logging.debug("Copying binary {0} to {1} without rendering"
                      .format(infile, outfile))
        shutil.copyfile(infile, outfile)

    else:
        # Force fwd slashes on Windows for get_template
        # This is a by-design Jinja issue
        infile_fwd_slashes = infile.replace(os.path.sep, '/')

        # Render the file
        tmpl = env.get_template(infile_fwd_slashes)
        rendered_file = tmpl.render(**context)

        # Render the output filename before writing
        name_tmpl = Template(outfile)
        rendered_name = name_tmpl.render(**context)
        logging.debug("Writing {0}".format(rendered_name))

        with unicode_open(rendered_name, 'w') as fh:
            fh.write(rendered_file)
示例#50
0
文件: BioID.py 项目: Amplytica/BioID
	def identify(self, input_data):
		identified = {}

		if type(input_data) == list:
			# identify() was passed a list of filenames
			for file_path in input_data:
				if is_binary(file_path):
					identified[file_path] = self.identify_binary(file_path)
				else:
					text_input_file = open(file_path, "rU")  # Read in text file as text with universal newlines ("rU")
					input_text = text_input_file.read()
					text_input_file.close()
					identified[file_path] = self.identify_text(input_text)
		elif type(input_data) == str:
			# identify() was passed a string from stdin
			identified["unknown_text"] = self.identify_text(input_data)
		else:
			# indentify() was (probably) passed some useless garbage
			print("Error: identify() received unrecognized input data: %s" % str(input_data))
			sys.exit(1)

		return identified
示例#51
0
文件: model.py 项目: zabano/geopd
    def __init__(self, filename):

        binary = is_binary(filename)
        encoding = guess_type(filename)[1]
        save_content = self.__class__.MAX_SIZE > 0

        self.filename = os.path.abspath(filename)
        st = os.stat(filename)
        self.mtime = int(st.st_mtime)
        self.size = st.st_size

        if save_content:
            if st.st_size > self.__class__.MAX_SIZE:
                raise Exception("file: '{0}' is too large".format(filename))
            self._content = ''

        chunk_size = os.statvfs(filename).f_frsize
        with self._open(filename, binary, encoding) as fp:
            md5 = hashlib.md5()
            for chunk in iter(partial(fp.read, chunk_size), b''):
                md5.update(chunk)
                if save_content:
                    self._content += chunk
            self.md5sum = md5.hexdigest()
示例#52
0
 def is_binary_comparison(self):
     return (
         (self.left_file and is_binary(self.left_file)) or
         (self.right_file and is_binary(self.right_file))
     )
示例#53
0
 def test_text_js(self):
     self.assertFalse(is_binary('tests/isBinaryFile/index.js'))
示例#54
0
 def test_binary_gif2(self):
     self.assertFalse(is_binary('tests/isBinaryFile/null_file.gif'))
示例#55
0
 def test_binary_gif3(self):
     self.assertTrue(is_binary('tests/isBinaryFile/trunks.gif'))
示例#56
0
 def might_be_binary(self):
     """Try to quickly guess if the file is binary."""
     from binaryornot.check import is_binary
     return is_binary(self.path)
示例#57
0
文件: files.py 项目: hevi9/hevi-lib
 def is_binary(self):
     raise DeprecationWarning()
     return is_binary(self.abspath)
示例#58
0
文件: files.py 项目: hevi9/hevi-lib
 def is_text(self):
     """ Is file content text ? """
     return not is_binary(self.abspath)
示例#59
0
    def run(self):
        try:
            self.preload()

            self.logger.debug("FM WebDav ReadFile worker run(), path = %s" % self.path)

            webdav_path = self.webdav.path(self.path)

            hash_str = self.random_hash()
            download_path = TMP_DIR + '/' + self.login + '/' + hash_str + '/'

            download_result = self.download_file_from_webdav(webdav_path, download_path)

            if download_result["success"]:
                filedir = self.webdav.parent(self.path)
                filename = self.path
                if filedir != '/':
                    filename = filename.replace(filedir, "", 1)
                read_path = (download_path + '/' + filename)
                if not os.path.exists(read_path):
                    raise OSError("File not downloaded")

                if is_binary(read_path):
                    raise OSError("File has binary content")

                with open(read_path, 'rb') as fd:
                    content = fd.read()

                # part of file content for charset detection
                part_content = content[0:self.charset_detect_buffer] + content[-self.charset_detect_buffer:]
                chardet_result = chardet.detect(part_content)
                detected = chardet_result["encoding"]
                confidence = chardet_result["confidence"]

                self.logger.debug("Detected encoding = %s (%s), %s" % (detected, confidence, read_path))

                # костыль пока не соберем нормальную версию libmagick >= 5.10
                # https://github.com/ahupp/python-magic/issues/47
                #
                # так же можно собрать uchardet от Mozilla, пока изучаю ее (тоже свои косяки),
                # кстати ее порт на python chardet мы юзаем, а их сайт уже мертв :(
                re_utf8 = re.compile('.*charset\s*=\s*utf\-8.*', re.UNICODE | re.IGNORECASE | re.MULTILINE)
                html_ext = ['htm', 'html', 'phtml', 'php', 'inc', 'tpl', 'xml']
                file_ext = os.path.splitext(read_path)[1][1:].strip().lower()

                if confidence > 0.75 and detected != 'windows-1251' and detected != FM.DEFAULT_ENCODING:
                    if detected == "ISO-8859-7":
                        detected = "windows-1251"

                    if detected == "ISO-8859-2":
                        detected = "utf-8"

                    if detected == "ascii":
                        detected = "utf-8"

                    if detected == "MacCyrillic":
                        detected = "windows-1251"

                    # если все же ошиблись - костыль на указанный в файле charset
                    if detected != FM.DEFAULT_ENCODING and file_ext in html_ext:
                        result_of_search = re_utf8.search(part_content)
                        self.logger.debug(result_of_search)
                        if result_of_search is not None:
                            self.logger.debug("matched utf-8 charset")
                            detected = FM.DEFAULT_ENCODING
                        else:
                            self.logger.debug("not matched utf-8 charset")

                elif confidence > 0.60 and detected != 'windows-1251' and detected != FM.DEFAULT_ENCODING:
                    if detected == "ISO-8859-2":
                        detected = "windows-1251"

                    if detected == "MacCyrillic":
                        detected = "windows-1251"

                    # если все же ошиблись - костыль на указанный в файле charset
                    if detected != FM.DEFAULT_ENCODING and file_ext in html_ext:
                        result_of_search = re_utf8.search(part_content)
                        self.logger.debug(result_of_search)
                        if result_of_search is not None:
                            self.logger.debug("matched utf-8 charset")
                            detected = FM.DEFAULT_ENCODING
                        else:
                            self.logger.debug("not matched utf-8 charset")

                elif detected == 'windows-1251' or detected == FM.DEFAULT_ENCODING:
                    pass
                else:
                    detected = FM.DEFAULT_ENCODING

                encoding = detected if (detected or "").lower() in FM.encodings else FM.DEFAULT_ENCODING
                self.logger.debug("Result encoding = %s, %s" % (encoding, read_path))

                answer = {
                    "item": self._make_file_info(read_path),
                    "content": content,
                    "encoding": encoding
                }

                result = {
                        "data": answer,
                        "error": False,
                        "message": None,
                        "traceback": None
                    }

                self.on_success(result)

        except Exception as e:
            result = {
                "error": True,
                "message": str(e),
                "traceback": traceback.format_exc()
            }

            self.on_error(result)
示例#60
0
文件: typo.py 项目: willseward/brain
def main():
    words_file = open('words/words.txt', 'r')
    words = words_file.read().split('\n')
    words_file.close()

    word_results = []

    for line in words:
        try:
            if line == "":
                continue

            split_line = line.split(' - ', 1)

            correct = split_line[0].strip();
            wrong_words = split_line[1].split(',')

            for wrong in wrong_words:
                word_results.append((wrong.strip(), correct))
        except:
            print "Failed to parse line: %s" % line
            pass

    for wrong_word, correct_word in word_results:
        print "Scraping code search for word: %s" % wrong_word

        scraped_info = scrape_code_search(wrong_word)

        scraped_info_text = BeautifulSoup(scraped_info.text, "lxml")
        scraped_links = list(set(scraped_info_text.find_all('a')))

        repos_file = open('repos/repos.txt', 'w')

        for scraped_link in scraped_links:
            if "/" in scraped_link['href']:
                if "/" in scraped_link.get_text():
                    repos_file.write("%s\n" % scraped_link.get_text())
                    print "Added %s to file" % scraped_link.get_text()

        sorted_repos_file = os.popen("sort repos/repos.txt | uniq").read()
        repos_file.truncate()
        repos_file.write(sorted_repos_file)

        repos_file.close()

        print "Successfully scraped code search for word: %s" % wrong_word

        with open('repos/repos.txt', 'r') as repos_file_read:
            repos = repos_file_read.read().split('\n')
            for repo in repos:
                project_forked = False
                body = """
    Hi! I'm a bot that checks GitHub for spelling mistakes, and I found one in your repository. When it
    should be '%s', you typed '%s'. I created this pull request to fix it!

    If you think there is anything wrong with this pull request or just have a question, be kind to mail me 
    at [email protected] (professional email, huh?). I’ll try to address the problem as soon as
    I’m aware of it.

    If you decide to close this pull request, please specify why before doing so.

    With kind regards,
    TheTypoMaster
                """ % (correct_word, wrong_word)

                repo_name = repo.split('/', 1)[1]

                credentials_file = open('credentials.txt', 'r')
                username, password = credentials_file.read().split('\n')
                credentials_file.close()

                create_fork(repo, username, password)
                print "Created fork: %s" % repo

                while True:
                    forked_project = requests.get("https://github.com/TheTypoMaster/%s" % repo_name)
                    if 'This repository is empty' in forked_project.text:
                        print "Large project; sleeping a little bit!"
                        time.sleep(1)
                    else:
                        break;

                os.system("git clone https://github.com/TheTypoMaster/%s.git" % repo_name)
                print "Successfully cloned directory: %s" % repo_name

                for dirpath, dirnames, filenames in os.walk(repo_name):
                    for name in filenames:
                        path = os.path.join(dirpath, name)
                        mimetype_name = mimetypes.guess_type(path)[0]

                        try:
                            if is_binary(path):
                                print "File '%s' ignored, not a text file (%s)" % (path, mimetype_name)
                            else:
                                with open(path, 'r+w') as filepath:
                                    filecontent = filepath.read()
                                    
                                    if wrong_word in filecontent:
                                        print "Found '%s' in path: %s" % (wrong_word, path)

                                        filecontent = filecontent.replace(wrong_word, correct_word)

                                        filepath.truncate(0)
                                        filepath.seek(0)
                                        filepath.write(filecontent)
                                    else:
                                        print "Could not find '%s' in path: %s" % (wrong_word, path)
                        except IOError:
                            print "Could not find file: '%s'" % path 
                    if '.git' in dirnames:
                        dirnames.remove('.git')

                os.chdir(repo_name)

                os.system("git add .")
                os.system("git commit -m \"Fix typo '%s' \"" % wrong_word)
                os.system("git push -u https://github.com/TheTypoMaster/%s.git master" % repo_name)

                os.chdir('../')

                print "Pushed changes"

                create_pull_request(repo, "Fix typo '%s'" % wrong_word, body, username, password)

                print "Created pull request for project '%s'" % repo_name

                cleanup(repo_name)

                print "Deleted project and emptied trash"

                with open('repos/repos.txt', 'r') as fin:
                    data = fin.read().splitlines(True)

                with open('repos/repos.txt', 'w') as fout:
                    fout.writelines(data[1:])

                print "Removed first line from file"

                print "Finished!"

                time.sleep(1)