def readFile(path): """ Function loads one selected file. :param path: path to file which user wants to load (String) :return: depends on filetype: if .odt - odt_data, stages if .omf || .ovf - rawVectorData, header """ if ".odt" in path: odt_data, stages = getPlotData(path) return odt_data, stages elif ".omf" in path or ".ovf" in path: rawVectorData = None if is_binary(path): headers, rawVectorData = MultiprocessingParse.readBinary( [path]) header = headers[0] elif not is_binary(path): rawVectorData = MultiprocessingParse.readText([path]) header = getFileHeader(path) else: raise RuntimeError("multiprocessing_parse.py readFile:" +\ " Can't detect encoding!") return rawVectorData, header else: raise ValueError("Invalid file! Must have .odt, .omf " + \ "or .ovf extension!")
def dirCheck(DIRPATH) : dirList = os.listdir(DIRPATH) for inFile in dirList : if(os.path.isdir(DIRPATH+inFile)) : print(DIRPATH+inFile+'/') dirCheck(DIRPATH+inFile+'/') elif(is_binary(DIRPATH+inFile)) : continue else : nLine = 0 f = open(DIRPATH+inFile,'r') TEMPDIR = FIXDIR[:-1]+DIRPATH mkdirp(TEMPDIR) fw = open(TEMPDIR+inFile+'.fix','w') while True: line = f.readline() if not line: break #matches = tool.check(line.decode('utf-8')) matches = tool.check(line) for idx in range(len(matches)) : if matches[idx].ruleId in ExceptRuleId : continue print DIRPATH+inFile, matches[idx].ruleId data = str(nLine)+' | '+str(matches[idx].fromx)+' '+str(matches[idx].tox)+' | '+line[matches[idx].fromx:matches[idx].tox]+' | '+matches[idx].msg+'\n' print data #fw.write(str(matches[idx]).encode("utf8")+'\n') fw.write(str(matches[idx])+'\n') fw.write(data+'\n') # fw.write(data.encode("utf8")+'\n') nLine += 1 fw.close() f.close()
def make_file(inpath, tmpldict, outpath=None): inpath = op.abspath(inpath) if outpath: outpath = render_str(outpath, tmpldict) if op.isdir(outpath): outpath = op.join(outpath, op.basename(inpath)) outpath = render_str(outpath, tmpldict) if is_binary(inpath): qprompt.status("Copying `%s`..." % (outpath), fsys.copy, [inpath, outpath]) return text = render_file(inpath, tmpldict) if text == None: return False # Handle rendered output. if outpath: outpath = op.abspath(outpath) if inpath == outpath: qprompt.fatal("Output cannot overwrite input template!") fsys.makedirs(op.dirname(outpath)) with io.open(outpath, "w", encoding="utf-8") as f: qprompt.status("Writing `%s`..." % (outpath), f.write, [text]) else: qprompt.echo(text) return True
def searchlist(flist: Union[List[Path], Iterable], txt: str, exclude: List[str], verbose: bool): mat = [] exc = set(exclude) for f in flist: if exc.intersection(set(str(f.resolve()).split('/'))): continue # note that searchfile() does NOT work for PDF even with text inside...but Grep does. Hmm.. if f.is_file() and f.stat().st_size < MAXSIZE: matchinglines: List[str] = [] if not is_binary(str(f)): here, matchinglines = searchfile(f, txt) elif f.suffix == '.pdf': here = searchbinary(f, txt) else: logging.info(f'skipped {f}') continue if here: mat.append(f) if verbose: print(MAGENTA + str(f)) print(BLACK + '\n'.join(matchinglines)) else: print(f)
def _is_executable(self, file_name): """Checks if a file is executable or can be executed by the interpreter for the program's selected language Args: file_name (str): The name of the file to be checked Returns: bool: True if the file can be executed """ extension = path.splitext(file_name)[-1] if os.access(file_name, os.X_OK) or extension == '.exe': return True if self.language == 'java' and extension in ['.class', '.jar']: return True if self.language == 'python' and extension in ['.py']: return True if self.language in ['bash', 'shell' ] and extension in ['.sh', '.bash']: return True return self.language in ['c', 'cpp', 'c++'] and extension in [ '' ] and is_binary(file_name)
def log_output(filename, source): """Log output to the database. Called by patched functions that do some sort of output (writing to a file etc) with the filename and some sort of information about the source. Note: the source parameter is currently not stored in the database. """ if isinstance(filename, list): for f in filename: log_output(f, source) return elif not isinstance(filename, six.string_types): try: filename = filename.name except: pass filename = os.path.abspath(filename) version = get_version(source) db = open_or_create_db() if option_set('data', 'file_diff_outputs') and os.path.isfile(filename) \ and not is_binary(filename): tf = tempfile.NamedTemporaryFile(delete=False) shutil.copy2(filename, tf.name) add_file_diff_to_db(filename, tf.name, db) if option_set('general', 'debug'): print("Output to %s using %s" % (filename, source)) #Update object in DB # data hash will be hashed at script exit, if enabled db.update(append("outputs", filename, no_duplicates=True), eids=[RUN_ID]) db.update(append("libraries", version, no_duplicates=True), eids=[RUN_ID]) db.close()
def process_state(self, state): if str(state['url']).startswith("file://"): path = state['url'][7:] else: return state if 'assume_nochange' in state.keys() and state['assume_nochange'] == True: return state if 'size' in state.keys() and state['size'] is not None: if int(state['size']) > self.size_limit: self.app.log.debug("%s is above size limit - skipping" % (path)) return state # filter out excludes for pat in self.excludepats: if pat.match(path): self.app.log.debug("%s excluded by %s" % (path, pat)) return state try: if is_binary(path): self.app.log.debug("%s seems to be binary - skipping" % (path)) return state with open(path, mode='rb') as file: state['content'] = file.read().decode('utf-8', 'replace') #self.app.log.debug("reading conffile contents: %s" % (path)) except: e = sys.exc_info()[1] state['content_error'] = e return state
def send_to_syslog(filename, sock, zflag): """Wraps individual syslog functions. Returns linecount""" linecount = 0 logging.debug("Function: send_to_syslog %s, send_zip=%s", filename, zflag) if os.stat(filename).st_size > 0: if is_binary(filename): if zflag: if filename.lower().endswith(".zip"): linecount = 0 logging.info("Skipped zip: %s", os.path.basename(filename)) elif filename.lower().endswith(".gz"): logging.info("Sending file: %s", os.path.basename(filename)) linecount = send_gzip_to_syslog(filename, sock) logging.info("Sent file: %s: Lines: %s", os.path.basename(filename), linecount) else: linecount = 0 logging.debug("Skipped binary file: %s", filename) else: logging.info("Sending file: %s", os.path.basename(filename)) linecount = send_text_to_syslog(filename, sock) logging.info("Sent file: %s: Lines: %s", os.path.basename(filename), linecount) else: linecount = 0 logging.info("Skipped empty file: %s", os.path.basename(filename)) return linecount
def get_file_mode(file_path: str, mode: str) -> str: """ Returns 'rb' if mode = 'binary'. Returns 'r' if mode = 'text' Returns 'rb' or 'r' if mode = 'auto' -> Will be automatically detected. Example: >>> get_file_mode("doesn't matter", 'binary') 'rb' >>> get_file_mode("doesn't matter", 'text') 'r' >>> get_file_mode(__file__, 'auto') 'r' Args: file_path: File to load. mode: One of ['binary', 'text', 'auto']. Returns: One of ['rb', 'r'] """ if mode == 'binary': return 'rb' if mode == 'text': return 'r' if mode == 'auto': return 'rb' if is_binary(file_path) else 'r' raise ValueError( "Argument 'mode' is expected to be one of: auto, binary, text")
def add(self, path): """ Make content available for download Add entry to table with Status as UPLOAD Returns TRUE on success FALSE on failure """ if (not os.path.exists(path)) or (os.path.isfile(path) and not is_binary(path)): logger.warning( "Path: Does not Exists or Is not a Binary File".format(path)) return False elif (os.path.isdir(path)): logger.info("Sharing all files of folder {}".format(path)) for file in os.listdir(path): file = path+"/"+file if os.path.isfile(file): # print(file) self.add(file) return True else: logger.info("Request to Share file: {}".format(path)) filename = os.path.splitext(path)[0].split("/")[-1] file_stat = os.stat(path) size = file_stat.st_size cSum = self.checksum_large(path) parentId = "0" randId = 0 status = constants.FS_UPLOADED replication = None self.add_entry(constants.DB_TABLE_FILE, filename, path, size, cSum, parentId, randId, status, replication) logger.info("File Share Done for {}".format(path)) return True
def getTokensFromProject(self, projectName): # for currentfolder, subfolders, files in os.walk(os.getcwd()): for currentfolder, subfolders, files in os.walk(projectName): for file in files: if not is_binary(os.path.join(currentfolder, file)): with open(os.path.join(currentfolder, file), 'r') as tosearch: contents = tosearch.read() r = re.compile(r'#{[^}]+}') matches = r.findall(contents) print( f'{matches} in file {os.path.join(currentfolder, file)}' ) for match in matches: if match not in self.tokens: self.tokens.append(match) if os.path.join(currentfolder, file) not in self.tokenized_files: self.tokenized_files.append( os.path.join(currentfolder, file)) print(self.tokens)
def diff_two_directories(logger, dir1, dir2, tmp_file, exclude): paths = os.listdir(dir1) if exclude is not None: exclude += ['.git'] else: exclude = ['.git'] for ex in exclude: if ex in paths: logger.debug('Ignore {}'.format(ex)) paths.remove(ex) for path in paths: path1 = os.path.join(dir1, path) path2 = os.path.join(dir2, path) if os.path.isdir(path1): logger.debug('Processing dir {}'.format(path1)) diff_two_directories(logger, path1, path2, tmp_file, exclude) elif is_binary(path1): logger.debug('Ignore binary file {}'.format(path1)) continue elif not os.path.exists(os.path.join(dir2, path)): logger.debug( 'Ignore single file (no same name file in dir2) {}'.format( path1)) continue else: logger.debug('Compare {} and {}'.format(path1, path2)) diff_two_files(os.path.join(dir1, path), os.path.join(dir2, path), tmp_file)
def run_linux_agents(file_agents): # print(file_agents) for file_path, lin_agent in file_agents.items(): if is_file_linux(file_path): # u pitanju je tekstualni parser if not is_binary(file_path) and ('btmp' not in file_path) and ('tallylog' not in file_path)\ and ('wtmp' not in file_path) and ('utmp' not in file_path) and ('lastlog' not in file_path): lin_agent.run() else: # da li je za lastlog if 'lastlog' in file_path: LastlogAgent(lin_agent).run() # faillog elif 'faillog' in file_path: FailLogAgent(lin_agent).run() # tallylog elif 'tallylog' in file_path: TallyLogAgent(lin_agent).run() # utmp, wtmp, btmp else: type = 'wtmp' if 'utmp' in file_path: type = 'utmp' elif 'btmp' in file_path: type = 'btmp' UWBTmpAgent(lin_agent, type).run()
def predict(self, blob): global classifier_model if not classifier_model: filename = '../resources/finalized_model.joblib.pkl' classifier_model = joblib.load(filename) log.info('Model loaded') time = datetime.datetime.now() log.info("Starting prediction") Examples = [] Files = [blob, '../resources/finalized_model.joblib.pkl'] for file_ in Files: if is_binary(file_): pass else: with open(file_, "r") as src_file_: Examples.append(src_file_.read()) predict_examples = classifier_model.predict(Examples) log.info("Finished with prediction within {} ".format( datetime.datetime.now() - time)) log.info("predict_examples {} ".format(predict_examples)) return predict_examples
def file_md5(fname): """ get the (md5 hexdigest, md5 digest) of a file """ if os.path.exists(fname): hash_md5 = hashlib.md5() # when dealing with large collections of binary files, the is_binary # call becomes a bottleneck, here we first check the extension to # avoid this bottleneck when possible: if fname.split('.')[-1] in BINARY_FILE_EXTENSIONS: binary = True else: binary = is_binary(fname) with open(fname, 'rb') as fobj: while True: data = fobj.read(LOCAL_CHUNK_SIZE) if not data: break if binary: chunk = data else: chunk = dos2unix(data) hash_md5.update(chunk) return (hash_md5.hexdigest(), hash_md5.digest()) else: return (None, None)
def licence_check(licence_ext, licence_ignore, project, project_dir): """ Peform basic checks for the presence of licence strings """ for root, dirs, files in os.walk(project_dir): dirs[:] = [d for d in dirs if d not in ignore_dirs] for file in files: if file.endswith(tuple(licence_ext)) \ and file not in licence_ignore: full_path = os.path.join(root, file) if not is_binary(full_path): fo = open(full_path, 'r') content = fo.read() # Note: Hardcoded use of 'copyright' & 'spdx' is the result # of a decision made at 2017 plugfest to limit searches to # just these two strings. patterns = [ 'copyright', 'spdx', 'http://creativecommons.org/licenses/by/4.0' ] if any(i in content.lower() for i in patterns): logger.info('Licence string present: %s', full_path) else: logger.error('Licence header missing: %s', full_path) with open(reports_dir + "licence-" + project + ".log", "a") \ as gate_report: gate_report.write( 'Licence header missing: {0}\n'.format( full_path))
def process_state(self, state): if str(state['url']).startswith("file://"): path = state['url'][7:] else: return state if 'assume_nochange' in state.keys( ) and state['assume_nochange'] == True: return state if 'size' in state.keys() and state['size'] is not None: if int(state['size']) > self.size_limit: self.app.log.debug("%s is above size limit - skipping" % (path)) return state # filter out excludes for pat in self.excludepats: if pat.match(path): self.app.log.debug("%s excluded by %s" % (path, pat)) return state try: if is_binary(path): self.app.log.debug("%s seems to be binary - skipping" % (path)) return state with open(path, mode='rb') as file: state['content'] = file.read().decode('utf-8', 'replace') #self.app.log.debug("reading conffile contents: %s" % (path)) except: e = sys.exc_info()[1] state['content_error'] = e return state
def grep(dir, str, recursive=False, ignorecase=False): paths_list = os.listdir(dir) for subpath in paths_list: name_to_check = subpath if ignorecase: name_to_check = subpath.lower() str = str.lower() full_path = os.path.join(dir, subpath) # print("Checking", full_path) if str in name_to_check: print("Filename matches template:", full_path) if not os.path.isdir(full_path): file_is_binary = False if binaryornot_installed: file_is_binary = is_binary(full_path) if not file_is_binary: with open(full_path, 'r') as f: line_num = 0 for line in f: line_num += 1 line_to_compare = line if ignorecase: line_to_compare = line_to_compare.lower() if str in line_to_compare: print("Line number {num} in file {path} matches template: {line}".format(num=line_num, path=full_path, line=line.strip())) if os.path.isdir(full_path): if recursive: grep(full_path, str, recursive, ignorecase) return 0
def download_item(item): """ Download a given pair of unprocessed and processed files """ url, output_dir, overwrite = item output_filepath = os.path.join(output_dir, os.path.basename(url)) output_filepath_wav = os.path.join(output_dir, os.path.basename(url)[0:-4] + '.wav') ib = True extra = ' >/dev/null 2>&1' if not os.path.exists(output_filepath) or overwrite: # Download the file cmd = 'wget -O ' + output_filepath + ' ' + url ret = os.system(cmd) ib = is_binary(output_filepath) if not ib: print("BAD", output_filepath) cmd = 'rm ' + output_filepath + extra ret = os.system(cmd) else: if '.mp3' in output_filepath: # Convert to wav using sox cmd = 'sox -v 0.99 ' + output_filepath + ' ' + output_filepath_wav + extra ret = os.system(cmd) cmd = 'rm ' + output_filepath + extra ret = os.system(cmd) return not ib else: return False
def file_md5(fname): """ get the (md5 hexdigest, md5 digest) of a file """ if os.path.exists(fname): hash_md5 = hashlib.md5() binary = is_binary(fname) if binary: mode = "rb" else: mode = "r" with open(fname, mode) as fobj: while True: data = fobj.read(LOCAL_CHUNK_SIZE) if not data: break if binary: chunk = data else: if sys.version_info[0] == 2: data = data.decode('utf-8') chunk = dos2unix(data).encode('utf-8') hash_md5.update(chunk) return (hash_md5.hexdigest(), hash_md5.digest()) else: return (None, None)
def get_contents(side): if side not in ('a', 'b'): return error('invalid side', 'Side must be "a" or "b", got %s' % side) # TODO: switch to index? might be simpler path = request.form.get('path', '') if not path: return error('incomplete', 'Incomplete request (need path)') idx = diff.find_diff_index(DIFF, side, path) if idx is None: return error('not found', 'Invalid path on side %s: %s' % (side, path)) d = DIFF[idx] abs_path = d.a_path if side == 'a' else d.b_path try: if is_binary(abs_path): size = os.path.getsize(abs_path) contents = "Binary file (%d bytes)" % size else: contents = open(abs_path).read() return Response(contents, mimetype='text/plain') except Exception: return error('read-error', 'Unable to read %s' % abs_path)
def check_paths(paths): for path in paths: if is_binary(path): continue for line in open(path, 'r', encoding="latin-1"): match = RE_OBJ.search(line) msg = 'variable not replaced in {}' assert match is None, msg.format(path)
def read_index(db, file_, file_path, update = False): global _index_count con = db[0] token_dict = db[1] encoding = "UTF-8" if is_binary(file_path): if not update: print("%s: is binary, skipping" % (file_path,)) else: if _args.batch > 0: _index_count += 1 # noqa if _index_count > _args.batch: con.close() sys.exit(0) try: inserts = set() insert_count = parse_file(db, file_, file_path, inserts, encoding) except UnicodeDecodeError as e: try: with open(file_path, "rb") as f: detector = UniversalDetector() for line in f.readlines(): detector.feed(line) if detector.done: break detector.close() encoding = detector.result['encoding'] if not encoding: raise e inserts = set() insert_count = parse_file( db, file_, file_path, inserts, encoding ) except UnicodeDecodeError: print("%s: decoding failed %s" % ( file_path, encoding )) inserts.clear() return encoding tokens = set([x[0] for x in inserts]) for token in tokens: inserts.add((token, file_, -1)) with con: new = token_dict.commit() con.execute(_clear_existing_index, (file_,)) con.executemany(_insert_index, inserts) unique_inserts = len(inserts) print("%s: indexed %s/%s (%.3f) new: %s %s" % ( file_path, unique_inserts, insert_count, float(unique_inserts) / (insert_count + 0.0000000001), new, encoding )) clear_cache(db) return encoding
def run(args, out=sys.stdout) -> int: """Add headers to files.""" if not any((args.copyright, args.license)): args.parser.error(_("option --copyright or --license is required")) if args.exclude_year and args.year: args.parser.error( _("option --exclude-year and --year are mutually exclusive")) paths = [_determine_license_path(path) for path in args.path] # First loop to verify before proceeding if args.style is None: _verify_paths_supported(paths, args.parser) project = create_project() template = None commented = False if args.template: try: template = _find_template(project, args.template) except TemplateNotFound: args.parser.error( _("template {template} could not be found").format( template=args.template)) if ".commented" in Path(template.name).suffixes: commented = True year = None if not args.exclude_year: if args.year: year = args.year else: year = datetime.date.today().year expressions = set(args.license) if args.license is not None else set() copyright_lines = (set( make_copyright_line(x, year=year) for x in args.copyright) if args.copyright is not None else set()) spdx_info = SpdxInfo(expressions, copyright_lines) result = 0 for path in paths: binary = is_binary(str(path)) if binary or args.explicit_license: new_path = f"{path}.license" if binary: _LOGGER.info( _("'{path}' is a binary, therefore using '{new_path}' " "for the header").format(path=path, new_path=new_path)) path = Path(new_path) path.touch() result += _add_header_to_file(path, spdx_info, template, commented, args.style, out) return min(result, 1)
def main(args=None): """Main program entry point, :param args: list of string command line arguments :type args: list[str] :return: program exit code or error string :rtype: int|str """ ap = argparse.ArgumentParser(description=__doc__) ap.add_argument('-e', '--exclude', metavar='PATH', action='append', help="Exclude given path or filename (can be given more than once)") ap.add_argument('-v', '--verbose', action='store_true', help="Report which files are processed and updatzed to stdout") ap.add_argument('files', nargs='*', help="Only process given files (default: all versioned files)") args = ap.parse_args(args) def report(msg): if args.verbose: print(msg) git_files = [] for path in check_output(['git', 'ls-files'] + args.files).decode().splitlines(): date = parse(check_output( ['git', '--no-pager', 'log', '-1', '--date=iso', '--pretty=format:%ai', '--', path] ).decode()) git_files.append((date, path)) for date, path in filter_excluded(git_files, args.exclude or [], itemgetter(1)): if is_binary(path): report("Ignoring binary file '%s'." % path) continue repl_func = partial(replace_year, date.year) try: with open(path) as fp: file_contents = fp.read() except OSError as exc: print("Could not read file '%s': %s" % (path, exc), file=sys.stderr) match = COPYRIGHT_PTN.search(file_contents) if match: new_contents, num_subst = COPYRIGHT_PTN.subn(repl_func, file_contents) if num_subst > 0 and file_contents.strip() != new_contents.strip(): report("Updating copyright year in '%s' to %s." % (path, date.year)) try: with open(path, 'w') as fp: fp.write(new_contents) except OSError as exc: print("Could not update file '%s': %s" % (path, exc), file=sys.stderr) else: report("Copyright statements(s) in '%s' already up to date." % path) else: report("No copyright statement found in '%s'." % path)
def generate_file(project_dir, infile, context, env): """ 1. Render the filename of infile as the name of outfile. 2. Deal with infile appropriately: a. If infile is a binary file, copy it over without rendering. b. If infile is a text file, render its contents and write the rendered infile to outfile. .. precondition:: When calling `generate_file()`, the root template dir must be the current working directory. Using `utils.work_in()` is the recommended way to perform this directory change. :param project_dir: Absolute path to the resulting generated project. :param infile: Input file to generate the file from. Relative to the root template dir. :param context: Dict for populating the cookiecutter's variables. :param env: Jinja2 template execution environment. """ logging.debug("Generating file {0}".format(infile)) # Render the path to the output file (not including the root project dir) outfile_tmpl = Template(infile) outfile = os.path.join(project_dir, outfile_tmpl.render(**context)) logging.debug("outfile is {0}".format(outfile)) # Just copy over binary files. Don't render. logging.debug("Check {0} to see if it's a binary".format(infile)) if is_binary(infile): logging.debug("Copying binary {0} to {1} without rendering" .format(infile, outfile)) shutil.copyfile(infile, outfile) else: # Force fwd slashes on Windows for get_template # This is a by-design Jinja issue infile_fwd_slashes = infile.replace(os.path.sep, '/') # Render the file try: tmpl = env.get_template(infile_fwd_slashes) except TemplateSyntaxError as exception: # Disable translated so that printed exception contains verbose # information about syntax error location exception.translated = False raise rendered_file = tmpl.render(**context) logging.debug("Writing {0}".format(outfile)) with unicode_open(outfile, 'w') as fh: fh.write(rendered_file) fh.write('\n') # Apply file permissions to output file shutil.copymode(infile, outfile)
def readFolder(directory, multipleFileHeaders=False): """ dumps process-ready format from directory Returns raw numpy array of vectors, file_header_files and odt data for 2d plotting :param directory :return rawVectorData, file_headers, getPlotData """ files_in_directory, ext = MultiprocessingParse.guess_file_type( directory) ext_files = glob.glob(os.path.join(directory, '*' + ext[0])) test_file = os.path.join(directory, ext_files[0]) stages = len(ext_files) plot_file = glob.glob(os.path.join(directory, ext[1])) # look for .odt or .txt in current directory if len(plot_file) > 1: raise ValueError("plot file extension conflict (too many)") #TODO error window elif not plot_file or plot_file is None: plot_data = None plot_file = None # NOTE: this should recognize both .omf and .ovf files trigger_list = None if plot_file is not None: plot_data, stages0 = getPlotData(plot_file[0]) print(stages0, stages) if stages0 != stages: if stages0 > stages: trigger_list = MultiprocessingParse.\ compose_trigger_list(ext_files, plot_data) stages = len(trigger_list) print(trigger_list) print("TRIGGER LIST : {}, {}".format( stages, len(trigger_list))) elif stages0 < stages: raise ValueError("Odt cannot have fewer stages that files") else: plot_data = None if not is_binary(test_file): rawVectorData = MultiprocessingParse.readText(files_in_directory) file_for_header = glob.glob(os.path.join(directory, '*' + ext[0])) # virtually any will do if not file_for_header: raise ValueError("no .omf or .ovf file has been found") header = getFileHeader(file_for_header[0]) else: headers, rawVectorData = MultiprocessingParse.readBinary( files_in_directory) header = headers[0] if not header: raise ValueError("no .omf or .ovf file has been found") return rawVectorData, header, plot_data, stages, trigger_list
def generate_file(project_dir, infile, context, env): """ 1. Render the filename of infile as the name of outfile. 2. Deal with infile appropriately: a. If infile is a binary file, copy it over without rendering. b. If infile is a text file, render its contents and write the rendered infile to outfile. Precondition: When calling `generate_file()`, the root template dir must be the current working directory. Using `utils.work_in()` is the recommended way to perform this directory change. :param project_dir: Absolute path to the resulting generated project. :param infile: Input file to generate the file from. Relative to the root template dir. :param context: Dict for populating the cookiecutter's variables. :param env: Jinja2 template execution environment. """ logging.debug('Generating file {0}'.format(infile)) # Render the path to the output file (not including the root project dir) outfile_tmpl = Template(infile) outfile = os.path.join(project_dir, outfile_tmpl.render(**context)) logging.debug('outfile is {0}'.format(outfile)) # Just copy over binary files. Don't render. logging.debug("Check {0} to see if it's a binary".format(infile)) if is_binary(infile): logging.debug('Copying binary {0} to {1} without rendering'.format( infile, outfile)) shutil.copyfile(infile, outfile) else: # Force fwd slashes on Windows for get_template # This is a by-design Jinja issue infile_fwd_slashes = infile.replace(os.path.sep, '/') # Render the file try: tmpl = env.get_template(infile_fwd_slashes) except TemplateSyntaxError as exception: # Disable translated so that printed exception contains verbose # information about syntax error location exception.translated = False raise rendered_file = tmpl.render(**context) logging.debug('Writing {0}'.format(outfile)) with io.open(outfile, 'w', encoding='utf-8') as fh: fh.write(rendered_file) # Apply file permissions to output file shutil.copymode(infile, outfile)
def _verify_paths_comment_style(paths: List[Path], parser: ArgumentParser): for path in paths: style = _get_comment_style(path) # TODO: This check is duplicated. if style is None and not is_binary(str(path)): parser.error( _("'{path}' does not have a recognised file extension," " please use --style, --explicit-license or" " --skip-unrecognised").format(path=path))
def process_file(src, dest, word_option, path, # pylint: disable=R0913 diff, text_only): """Rename in a file.""" if is_binary(path): return # if --text-only requested, do not perform substitutions in filepath if not text_only: new_path = edit_line(src, dest, path, word_option) else: new_path = path try: with io.open(path, 'r', encoding='utf-8') as in_file: in_lines = in_file.readlines() except IOError as e: logging.warn('could not read file, error message: {1}' .format(path, e)) return except UnicodeDecodeError as e: logging.debug('could not read file, error message: {1}' .format(path, e)) return # perform substitions in file contents out_lines = list(edit_text(src, dest, in_lines, word_option)) # only output diff to stdout, do not write anything to file (if requested # by --diff) if diff: diffs = difflib.unified_diff(in_lines, out_lines, fromfile=path, tofile=new_path) for line in diffs: sys.stdout.write(line) else: try: with io.open(new_path, 'w', encoding='utf-8') as out_file: out_file.writelines(out_lines) except IOError as e: logging.warn('could not write file, error message: {1}' .format(path, e)) if new_path != path: try: # explicitly mkdir missing directories (due to possible subst. # in filepath) new_dir = os.path.dirname(new_path) if not os.path.exists(new_dir): os.makedirs(new_dir) shutil.copymode(path, new_path) os.unlink(path) except OSError as e: logging.warn('could not delete file, error message: {1}' .format(path,e)) return
def get_cached_pdf(headers, item, decoded): file_path = item['path'] file_name = file_path.replace('/', '__') \ + pathlib.Path(item['rootName']).suffix file_name_convert = file_name + '.pdf' cached_pdf[decoded['username']] = cached_pdf.get(decoded['username']) or [] cached_item = next( (item for item in cached_pdf[decoded['username']] if item['path'] == file_path), None) upload_folder_path = ROOT_DIR+'/uploads/' + decoded['username'] if not os.path.exists(upload_folder_path): os.makedirs(upload_folder_path) path_file_converted = upload_folder_path + '/' + file_name_convert path_file_download = upload_folder_path + '/' + file_name if cached_item and (datetime.now() - cached_item['ts']).total_seconds() <= CACHE_LIFE_TIME: print((datetime.now() - cached_item['ts']).total_seconds()) cached_item['ts'] = datetime.now() try: PyPDF2.PdfFileReader(open(path_file_download, "rb")) except PyPDF2.utils.PdfReadError: return base64.b64encode(open(path_file_converted, "rb").read()) else: return base64.b64encode(open(path_file_download, "rb").read()) cached_item = {} response = SendRequest(headers, file_path) url = response.json()['url'] filedata = requests.get(url) if filedata.status_code == 404: return "NO FILE PREVIEW" if filedata.status_code == 200: with open(path_file_download, 'wb') as f: f.write(filedata.content) if is_binary(path_file_download) and not item['rootName'].lower().endswith( ('jpg', 'JPG', 'png', 'PNG', 'jpeg', 'JPEG', 'gif', 'GIF', 'bmp', 'BMP', 'svg', 'SVG', 'pdf', 'las', 'asc', 'LAS', 'TXT', 'ASC', 'csv', 'CSV', 'xlsx', 'XLSX', 'XLS', 'xls', 'ppt', 'PPT', 'pptx', 'PPTX', 'doc', 'DOC', 'docx', 'DOCX', 'mpp')): return {'isNotReadable': 1} try: PyPDF2.PdfFileReader(open(path_file_download, "rb")) except PyPDF2.utils.PdfReadError: # if path_file_download.lower().endswith(('xlsx', 'xls', 'csv')): # ConvertFileExcel(path_file_download) # else: ConvertFile(path_file_download) else: path_file_converted = path_file_download cached_item['path'] = file_path cached_item['ts'] = datetime.now() cached_pdf[decoded['username']].append(cached_item) try: readFile = open(path_file_converted, "rb") except IOError: return "NO PDF FILE TO PREVIEW" else: return base64.b64encode(open(path_file_converted, "rb").read())
def _verify_paths_supported(paths, parser): for path in paths: try: COMMENT_STYLE_MAP[path.suffix] except KeyError: # TODO: This check is duplicated. if not is_binary(str(path)): parser.error( _("'{}' does not have a recognised file extension, " "please use --style".format(path)))
def check_password_replaced(paths): PATTERN = 'POSTGRES_PASSWORD!!!' RE_OBJ = re.compile(PATTERN) for path in paths: if not is_binary(path): for line in open(path, 'r'): match = RE_OBJ.search(line) msg = 'password variable not replaced in {}' assert match is None, msg.format(path)
def check_paths(paths): """Method to check all paths have correct substitutions.""" # Assert that no match is found in any of the files for path in paths: if is_binary(path): continue for line in open(path, "r"): match = RE_OBJ.search(line) assert match is None, f"cookiecutter variable not replaced in {path}"
def main(): root_path = os.getcwd() for dirpath, dirnames, filenames in os.walk(root_path): for filename in filenames: path = os.path.join(dirpath, filename) if not is_binary(path): fd = open(path, 'a+b') fd.seek(-1, os.SEEK_END) if not fd.read(1) == '\n': fd.seek(0, os.SEEK_END) fd.write('\n')
def generate_html(templates_dir, output_dir, context=None, unexpanded_templates=()): """ Renders the HTML templates from `templates_dir`, and writes them to `output_dir`. :param templates_dir: The Complexity templates directory, e.g. `project/templates/`. :paramtype templates_dir: directory :param output_dir: The Complexity output directory, e.g. `www/`. :paramtype output_dir: directory :param context: Jinja2 context that holds template variables. See http://jinja.pocoo.org/docs/api/#the-context """ logging.debug('Templates dir is {0}'.format(templates_dir)) if not os.path.exists(templates_dir): raise MissingTemplateDirException( 'Your project is missing a templates/ directory containing your \ HTML templates.' ) context = context or {} env = Environment() # os.chdir(templates_dir) env.loader = FileSystemLoader(templates_dir) # Create the output dir if it doesn't already exist make_sure_path_exists(output_dir) for root, dirs, files in os.walk(templates_dir): for f in files: # print(f) template_filepath = os.path.relpath( os.path.join(root, f), templates_dir ) force_unexpanded = template_filepath in unexpanded_templates logging.debug('Is {0} in {1}? {2}'.format( template_filepath, unexpanded_templates, force_unexpanded )) if is_binary(os.path.join(templates_dir, template_filepath)): print('Non-text file found: {0}. Skipping.'. format(template_filepath)) else: outfile = get_output_filename(template_filepath, output_dir, force_unexpanded) print('Copying {0} to {1}'.format(template_filepath, outfile)) generate_html_file(template_filepath, output_dir, env, context, force_unexpanded)
def mime_type(self): """ Determine mime type of the file content. """ if self._mime_type is None: type = guess_type(self.abspath, False)[0] if type is None: if is_binary(self.abspath): type = DEFAULT_MIME_TYPE_BIN else: type = DEFAULT_MIME_TYPE_TEXT self._mime_type = type return self._mime_type
def check_paths(paths): """Method to check all paths have correct substitutions, used by other tests cases """ # Assert that no match is found in any of the files for path in paths: if is_binary(path): continue for line in open(path, 'r'): match = RE_OBJ.search(line) msg = "cookiecutter variable not replaced in {}" assert match is None, msg.format(path)
def ensure_newlines(): root_path = os.getcwd() for dirpath, _, filenames in os.walk(root_path): for filename in filenames: path = os.path.join(dirpath, filename) if not is_binary(path): fd = open(path, 'a+b') try: fd.seek(-1, os.SEEK_END) if not fd.read(1) == '\n': fd.seek(0, os.SEEK_END) fd.write('\n') except IOError: # This was an empty file, so do nothing pass
def generate_file(project_dir, infile, context, env): """ 1. Render the contents of infile. 2. Render the filename of infile as the name of outfile. 3. Write the rendered infile to outfile. :param infile: Input file to generate the file from. """ logging.debug("Generating file {0}".format(infile)) # Render the intermediary path to the output file (not including the root # project dir nor the filename itself) outdir_tmpl = Template(os.path.dirname(infile)) outdir = outdir_tmpl.render(**context) # Write the file to the corresponding place fname = os.path.basename(infile) # input/output filename outfile = os.path.join(project_dir, outdir, fname) logging.debug("outfile is {0}".format(outfile)) # Just copy over binary files. Don't render. logging.debug("Check {0} to see if it's a binary".format(infile)) if is_binary(infile): logging.debug("Copying binary {0} to {1} without rendering" .format(infile, outfile)) shutil.copyfile(infile, outfile) else: # Force fwd slashes on Windows for get_template # This is a by-design Jinja issue infile_fwd_slashes = infile.replace(os.path.sep, '/') # Render the file try: tmpl = env.get_template(infile_fwd_slashes) except TemplateSyntaxError as exception: # Disable translated so that printed exception contains verbose # information about syntax error location exception.translated = False raise rendered_file = tmpl.render(**context) # Render the output filename before writing name_tmpl = Template(outfile) rendered_name = name_tmpl.render(**context) logging.debug("Writing {0}".format(rendered_name)) with unicode_open(rendered_name, 'w') as fh: fh.write(rendered_file)
def get_file_encoding(path): if is_binary(path): return 'binary' try: encodings = template['encodings'] except KeyError: encodings = ['utf-8', 'cp1252'] for encoding in encodings: try: with open(path, encoding=encoding) as f: f.read() except UnicodeDecodeError: continue else: return encoding
def do(process_parameters, settings, filename): file_pass = False counter = 0 while not file_pass: try: from_address = settings['email_address'] to_address = process_parameters['email_to'] to_address_list = to_address.split(", ") msg = MIMEMultipart() filename_no_path = os.path.basename(filename) if process_parameters['email_subject_line'] != "": date_time = str(time.ctime()) subject_line_constructor = process_parameters['email_subject_line'] msg['Subject'] = subject_line_constructor.replace("%datetime%", date_time).replace("%filename%", filename_no_path) else: msg['Subject'] = str(filename_no_path) + " Attached" msg['From'] = from_address msg['To'] = to_address body = str(filename_no_path) + " Attached" msg.attach(MIMEText(body, 'plain')) with open(filename, 'rb') as attachment: part = MIMEBase('application', 'octet-stream; name="%s"' % filename_no_path) part.set_payload(attachment.read()) if is_binary(filename): encoders.encode_base64(part) part.add_header('X-Attachment-Id', '1') part.add_header('Content-Disposition', 'attachment; filename="%s"' % filename_no_path) msg.attach(part) server = smtplib.SMTP(str(settings['email_smtp_server']), str(settings['smtp_port'])) server.starttls() server.login(from_address, settings['email_password']) server.sendmail(from_address, to_address_list, msg.as_string()) server.close() file_pass = True except Exception: if counter == 10: raise counter += 1
def get_context_data(self, **kwargs): context = super(SampleDetail, self).get_context_data(**kwargs) object = kwargs['object'] file_ = object.fasta_file.file.name if is_binary(file_): file_head = 'The uploaded file is a binary file !!' else: file_head = '' with open(file_) as f: index = 0 line = f.readline() while line and index < 6: file_head += line line = f.readline() index += 1 context['file_head'] = file_head return context
def read_directory(project_directory): directory_entries = [] project_parent_directory = os.path.normpath( os.path.join(project_directory, os.pardir) ) ignore_path = os.path.join(project_directory, '.gitignore') ignore_list = [] if os.path.exists(ignore_path): ignore_list = read_ignore(read_file(ignore_path)) for root, dirs, files in os.walk(project_directory): directory_path = os.path.relpath(root, project_parent_directory) if '.git' in directory_path: dirs[:] = [] continue directory = { 'path': directory_path, 'files': [], } if ignore_list: files = remove_ignores(files, ignore_list) for file_path in files: if file_path.startswith('.'): continue file_path = os.path.join(root, file_path) content = read_file(file_path) is_binary = check.is_binary(file_path) directory['files'].append({ 'path': os.path.relpath(file_path, project_parent_directory), 'binary': is_binary, 'content': content, }) directory_entries.append(directory) return directory_entries
def check_paths(self, paths): """ Method to check all paths have correct substitutions, used by other tests cases """ # Construct the cookiecutter search pattern pattern = "{{(\s?cookiecutter)[.](.*?)}}" re_obj = re.compile(pattern) # Assert that no match is found in any of the files for path in paths: if not is_binary(path): for line in open(path, 'r'): match = re_obj.search(line) self.assertIsNone( match, "cookiecutter variable not replaced in {}".format(path))
def worker(re_text, file_queue, result_queue, logger, timeout): while int(time.time()) < timeout: if file_queue.empty() is not True: f_path = file_queue.get() try: if not is_binary(f_path): mime = mimetypes.guess_type(f_path)[0] # исключаем некоторые mime типы из поиска if mime not in ["application/pdf", "application/rar"]: with open(f_path, "rb") as fp: for line in fp: try: line = as_unicode(line) except UnicodeDecodeError: charset = chardet.detect(line) if charset.get("encoding") in ["MacCyrillic"]: detected = "windows-1251" else: detected = charset.get("encoding") if detected is None: break try: line = str(line, detected, "replace") except LookupError: pass if re_text.match(line) is not None: result_queue.put(f_path) # logger.debug("matched file = %s " % f_path) break except UnicodeDecodeError as unicode_e: logger.error("UnicodeDecodeError %s, %s" % (str(unicode_e), traceback.format_exc())) except IOError as io_e: logger.error("IOError %s, %s" % (str(io_e), traceback.format_exc())) except Exception as other_e: logger.error("Exception %s, %s" % (str(other_e), traceback.format_exc())) finally: file_queue.task_done() else: time.sleep(REQUEST_DELAY)
def openFile(self): """ Open file :return: """ file_dialog = QtGui.QFileDialog() filename, file_type = file_dialog.getOpenFileNameAndFilter(self, caption='Open file', directory=QtCore.QString(self.MBD_folder_abs_path), filter=self._file_types) filename = str(filename) if filename: if is_binary(filename): with open(filename, 'rb') as _file: print "_file =", _file self.MBD_system = dill.load(_file) else: pass
def generate_file(infile, context, env): """ 1. Render the contents of infile. 2. Render the filename of infile as the name of outfile. 3. Write the rendered infile to outfile. :param infile: Input file to generate the file from. """ logging.debug("Generating file {0}".format(infile)) # Render the path to the output file (but don't include the filename) outdir_tmpl = Template(os.path.dirname(os.path.abspath(infile))) outdir = outdir_tmpl.render(**context) fname = os.path.basename(os.path.abspath(infile)) # input/output filename # Write it to the corresponding place in output_dir outfile = os.path.join(outdir, fname) logging.debug("outfile is {0}".format(outfile)) # Just copy over binary files. Don't render. logging.debug("Check {0} to see if it's a binary".format(infile)) if is_binary(infile): logging.debug("Copying binary {0} to {1} without rendering" .format(infile, outfile)) shutil.copyfile(infile, outfile) else: # Force fwd slashes on Windows for get_template # This is a by-design Jinja issue infile_fwd_slashes = infile.replace(os.path.sep, '/') # Render the file tmpl = env.get_template(infile_fwd_slashes) rendered_file = tmpl.render(**context) # Render the output filename before writing name_tmpl = Template(outfile) rendered_name = name_tmpl.render(**context) logging.debug("Writing {0}".format(rendered_name)) with unicode_open(rendered_name, 'w') as fh: fh.write(rendered_file)
def identify(self, input_data): identified = {} if type(input_data) == list: # identify() was passed a list of filenames for file_path in input_data: if is_binary(file_path): identified[file_path] = self.identify_binary(file_path) else: text_input_file = open(file_path, "rU") # Read in text file as text with universal newlines ("rU") input_text = text_input_file.read() text_input_file.close() identified[file_path] = self.identify_text(input_text) elif type(input_data) == str: # identify() was passed a string from stdin identified["unknown_text"] = self.identify_text(input_data) else: # indentify() was (probably) passed some useless garbage print("Error: identify() received unrecognized input data: %s" % str(input_data)) sys.exit(1) return identified
def __init__(self, filename): binary = is_binary(filename) encoding = guess_type(filename)[1] save_content = self.__class__.MAX_SIZE > 0 self.filename = os.path.abspath(filename) st = os.stat(filename) self.mtime = int(st.st_mtime) self.size = st.st_size if save_content: if st.st_size > self.__class__.MAX_SIZE: raise Exception("file: '{0}' is too large".format(filename)) self._content = '' chunk_size = os.statvfs(filename).f_frsize with self._open(filename, binary, encoding) as fp: md5 = hashlib.md5() for chunk in iter(partial(fp.read, chunk_size), b''): md5.update(chunk) if save_content: self._content += chunk self.md5sum = md5.hexdigest()
def is_binary_comparison(self): return ( (self.left_file and is_binary(self.left_file)) or (self.right_file and is_binary(self.right_file)) )
def test_text_js(self): self.assertFalse(is_binary('tests/isBinaryFile/index.js'))
def test_binary_gif2(self): self.assertFalse(is_binary('tests/isBinaryFile/null_file.gif'))
def test_binary_gif3(self): self.assertTrue(is_binary('tests/isBinaryFile/trunks.gif'))
def might_be_binary(self): """Try to quickly guess if the file is binary.""" from binaryornot.check import is_binary return is_binary(self.path)
def is_binary(self): raise DeprecationWarning() return is_binary(self.abspath)
def is_text(self): """ Is file content text ? """ return not is_binary(self.abspath)
def run(self): try: self.preload() self.logger.debug("FM WebDav ReadFile worker run(), path = %s" % self.path) webdav_path = self.webdav.path(self.path) hash_str = self.random_hash() download_path = TMP_DIR + '/' + self.login + '/' + hash_str + '/' download_result = self.download_file_from_webdav(webdav_path, download_path) if download_result["success"]: filedir = self.webdav.parent(self.path) filename = self.path if filedir != '/': filename = filename.replace(filedir, "", 1) read_path = (download_path + '/' + filename) if not os.path.exists(read_path): raise OSError("File not downloaded") if is_binary(read_path): raise OSError("File has binary content") with open(read_path, 'rb') as fd: content = fd.read() # part of file content for charset detection part_content = content[0:self.charset_detect_buffer] + content[-self.charset_detect_buffer:] chardet_result = chardet.detect(part_content) detected = chardet_result["encoding"] confidence = chardet_result["confidence"] self.logger.debug("Detected encoding = %s (%s), %s" % (detected, confidence, read_path)) # костыль пока не соберем нормальную версию libmagick >= 5.10 # https://github.com/ahupp/python-magic/issues/47 # # так же можно собрать uchardet от Mozilla, пока изучаю ее (тоже свои косяки), # кстати ее порт на python chardet мы юзаем, а их сайт уже мертв :( re_utf8 = re.compile('.*charset\s*=\s*utf\-8.*', re.UNICODE | re.IGNORECASE | re.MULTILINE) html_ext = ['htm', 'html', 'phtml', 'php', 'inc', 'tpl', 'xml'] file_ext = os.path.splitext(read_path)[1][1:].strip().lower() if confidence > 0.75 and detected != 'windows-1251' and detected != FM.DEFAULT_ENCODING: if detected == "ISO-8859-7": detected = "windows-1251" if detected == "ISO-8859-2": detected = "utf-8" if detected == "ascii": detected = "utf-8" if detected == "MacCyrillic": detected = "windows-1251" # если все же ошиблись - костыль на указанный в файле charset if detected != FM.DEFAULT_ENCODING and file_ext in html_ext: result_of_search = re_utf8.search(part_content) self.logger.debug(result_of_search) if result_of_search is not None: self.logger.debug("matched utf-8 charset") detected = FM.DEFAULT_ENCODING else: self.logger.debug("not matched utf-8 charset") elif confidence > 0.60 and detected != 'windows-1251' and detected != FM.DEFAULT_ENCODING: if detected == "ISO-8859-2": detected = "windows-1251" if detected == "MacCyrillic": detected = "windows-1251" # если все же ошиблись - костыль на указанный в файле charset if detected != FM.DEFAULT_ENCODING and file_ext in html_ext: result_of_search = re_utf8.search(part_content) self.logger.debug(result_of_search) if result_of_search is not None: self.logger.debug("matched utf-8 charset") detected = FM.DEFAULT_ENCODING else: self.logger.debug("not matched utf-8 charset") elif detected == 'windows-1251' or detected == FM.DEFAULT_ENCODING: pass else: detected = FM.DEFAULT_ENCODING encoding = detected if (detected or "").lower() in FM.encodings else FM.DEFAULT_ENCODING self.logger.debug("Result encoding = %s, %s" % (encoding, read_path)) answer = { "item": self._make_file_info(read_path), "content": content, "encoding": encoding } result = { "data": answer, "error": False, "message": None, "traceback": None } self.on_success(result) except Exception as e: result = { "error": True, "message": str(e), "traceback": traceback.format_exc() } self.on_error(result)
def main(): words_file = open('words/words.txt', 'r') words = words_file.read().split('\n') words_file.close() word_results = [] for line in words: try: if line == "": continue split_line = line.split(' - ', 1) correct = split_line[0].strip(); wrong_words = split_line[1].split(',') for wrong in wrong_words: word_results.append((wrong.strip(), correct)) except: print "Failed to parse line: %s" % line pass for wrong_word, correct_word in word_results: print "Scraping code search for word: %s" % wrong_word scraped_info = scrape_code_search(wrong_word) scraped_info_text = BeautifulSoup(scraped_info.text, "lxml") scraped_links = list(set(scraped_info_text.find_all('a'))) repos_file = open('repos/repos.txt', 'w') for scraped_link in scraped_links: if "/" in scraped_link['href']: if "/" in scraped_link.get_text(): repos_file.write("%s\n" % scraped_link.get_text()) print "Added %s to file" % scraped_link.get_text() sorted_repos_file = os.popen("sort repos/repos.txt | uniq").read() repos_file.truncate() repos_file.write(sorted_repos_file) repos_file.close() print "Successfully scraped code search for word: %s" % wrong_word with open('repos/repos.txt', 'r') as repos_file_read: repos = repos_file_read.read().split('\n') for repo in repos: project_forked = False body = """ Hi! I'm a bot that checks GitHub for spelling mistakes, and I found one in your repository. When it should be '%s', you typed '%s'. I created this pull request to fix it! If you think there is anything wrong with this pull request or just have a question, be kind to mail me at [email protected] (professional email, huh?). I’ll try to address the problem as soon as I’m aware of it. If you decide to close this pull request, please specify why before doing so. With kind regards, TheTypoMaster """ % (correct_word, wrong_word) repo_name = repo.split('/', 1)[1] credentials_file = open('credentials.txt', 'r') username, password = credentials_file.read().split('\n') credentials_file.close() create_fork(repo, username, password) print "Created fork: %s" % repo while True: forked_project = requests.get("https://github.com/TheTypoMaster/%s" % repo_name) if 'This repository is empty' in forked_project.text: print "Large project; sleeping a little bit!" time.sleep(1) else: break; os.system("git clone https://github.com/TheTypoMaster/%s.git" % repo_name) print "Successfully cloned directory: %s" % repo_name for dirpath, dirnames, filenames in os.walk(repo_name): for name in filenames: path = os.path.join(dirpath, name) mimetype_name = mimetypes.guess_type(path)[0] try: if is_binary(path): print "File '%s' ignored, not a text file (%s)" % (path, mimetype_name) else: with open(path, 'r+w') as filepath: filecontent = filepath.read() if wrong_word in filecontent: print "Found '%s' in path: %s" % (wrong_word, path) filecontent = filecontent.replace(wrong_word, correct_word) filepath.truncate(0) filepath.seek(0) filepath.write(filecontent) else: print "Could not find '%s' in path: %s" % (wrong_word, path) except IOError: print "Could not find file: '%s'" % path if '.git' in dirnames: dirnames.remove('.git') os.chdir(repo_name) os.system("git add .") os.system("git commit -m \"Fix typo '%s' \"" % wrong_word) os.system("git push -u https://github.com/TheTypoMaster/%s.git master" % repo_name) os.chdir('../') print "Pushed changes" create_pull_request(repo, "Fix typo '%s'" % wrong_word, body, username, password) print "Created pull request for project '%s'" % repo_name cleanup(repo_name) print "Deleted project and emptied trash" with open('repos/repos.txt', 'r') as fin: data = fin.read().splitlines(True) with open('repos/repos.txt', 'w') as fout: fout.writelines(data[1:]) print "Removed first line from file" print "Finished!" time.sleep(1)