def queryAlexa(self, url): php_url = "http://localhost/ad_detect/get_url_category.php"; php_url += '?site=' + urllib2.quote(url); try: response = urllib2.urlopen(php_url); html = response.read(); ret = json.loads(html)['Response']['UrlInfoResult']['Alexa']['Related']['Categories']['CategoryData']; # Check whether it's a single Top/World category if type(ret) == dict: path = ret['AbsolutePath']; refined_path = '/'.join(path.split('/')[:3]); if 'World' in refined_path or 'Region' in refined_path: return {}; if type(ret) == list: empty = True; for i in range(len(ret)): path = ret[i]['AbsolutePath']; refined_path = '/'.join(path.split('/')[:3]); # Ignore region-based categories if not 'World' in refined_path and not 'Region' in refined_path: empty = False; if empty: return {}; return {'source':'Alexa', 'category':ret}; except: return {}
def fix_duplicates(duplicate_chunks): for path in duplicate_chunks: # deconstruct relevant information from chunk path, clean it path_components = path.split("/") if len(path_components) == 5: _, study_obj_id, username, data_stream, timestamp = path.split("/") elif len(path_components) == 4: study_obj_id, username, data_stream, timestamp = path.split("/") else: print( "You appear to have an invalid file path. Please report this error to https://github.com/onnela-lab/beiwe-backend/issues" ) raise Exception("invalid_path: %s" % path) # not all files are chunkable, they will require different logic. if data_stream not in CHUNKABLE_FILES: remove_all_but_one_chunk(path) continue else: try: FileToProcess.reprocess_originals_from_chunk_path(path) except Exception as e: if "did not find any matching files" in str(e): pass else: raise remove_all_but_one_chunk(path)
def getPageCategory(self, url): result = self.getPageRawCategory(url); if result == {}: return {}; ret = {'source':'','category':[]}; # Two levels for Alexa (e.g. Top/Shopping/Music) if result['source'] == 'Alexa': ret['source'] = 'Alexa'; try: if type(result['category']) == list: for i in range(len(result['category'])): path = result['category'][i]['AbsolutePath']; refined_path = '/'.join(path.split('/')[:3]); # Ignore region-based categories if 'World' in refined_path or 'Region' in refined_path: continue; if not refined_path in ret['category']: ret['category'].append(refined_path); elif type(result['category']) == dict: path = result['category']['AbsolutePath']; refined_path = '/'.join(path.split('/')[:3]); ret['category'].append(refined_path); except: self.stats.increment('Category detection failed', 1); print 'ERROR PARSING:',result; # Ignore all scores for Yahoo elif result['source'] == 'Yahoo': ret['source'] = 'Yahoo'; try: for i in range(len(result['category'])): cat = result['category'][i]['category']; if not cat in ret['category']: ret['category'].append(cat); except: self.stats.increment('Category detection failed', 1); print 'ERROR PARSING:',result; # Alchemy elif result['source'] == 'Alchemy': ret['source'] = 'Alchemy'; ret['category'] = [result['category']]; # Bluecoat elif result['source'] == 'Bluecoat': ret['source'] = 'Bluecoat'; try: for i in range(len(result['category'])): cat = result['category'][i]; if not cat in ret['category']: ret['category'].append(cat); except: self.stats.increment('Category detection failed', 1); print 'ERROR PARSING:',result; else: ret = {}; if ret != {} and ret['source'] != '': self.stats.increment('Category detection succeeded', 1); ret['mapped_category'] = self.mapCategory(ret['source'], ret['category']); else: self.stats.increment('Category detection failed', 1); return ret;
def translate_path(self, path): """ Override to handle redirects. """ path = path.split('?', 1)[0] path = path.split('#', 1)[0] path = normpath(unquote(path)) words = filter(lambda a: a != '' and a not in (os.curdir, os.pardir), path.split('/')) return os.path.join(self.serve_path, *words)
def which(program, use_secure_path=False, options=None): """Searches the environment PATH (or an hard-coded 'secure' path) for an executable with the given name.""" def is_exe(fpath): return os.path.isfile(fpath) and os.access(fpath, os.X_OK) fpath, name = os.path.split(program) if fpath: if is_exe(program): if options: program += " " + options return program else: if use_secure_path: path = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" else: path = os.environ["PATH"] for part in path.split(os.pathsep): part = part.strip('"') exe_file = os.path.join(part, program) if is_exe(exe_file): if options: exe_file += " " + exe_file return exe_file return None
def stidy(structure, ang, d1, d2, d3): PLATON = find_executable('platon') if not PLATON: PLATON = '../bin/platon' with NamedTemporaryFile(suffix='.cif') as temp_file: # write temporary cif file CifWriter(structure).write_file(temp_file.name) temp_file.flush() # run ADDSYM_SHX to make PLATON recognize symmetries addsym_shx_process = Popen(['platon', '-o', temp_file.name], stdout=PIPE, stderr=STDOUT, stdin=PIPE) try: addsym_shx_process.communicate( input=b'ADDSYM_SHX {} {} {} {}'.format(ang, d1, d2, d3)) except TimeoutExpired as t: return ExitCode(408, 'ADDSYM_SHX timed out: {}'.format(t)) except Exception as e: return ExitCode(500, 'ADDSYM_SHX crashed: {}'.format(e)) # call STIDY on the ADDSYM_SHX output temp_file_dirname, temp_file_basename = path.split(temp_file.name) temp_file_basename_extless, _ = path.splitext(temp_file_basename) temp_file_basename_spf = temp_file_basename_extless + '_pl.spf' temp_file_spf = path.join(temp_file_dirname, temp_file_basename_spf) if not os.path.isfile(temp_file_spf): return ExitCode(500, 'ADDSYM_SHX failed to write *_pl.spf file') stidy_process = Popen(['platon', '-o', temp_file_spf], stdout=PIPE, stderr=STDOUT, stdin=PIPE) try: stidy_data = stidy_process.communicate(input=b'STIDY') except TimeoutExpired as t: return ExitCode(408, 'STIDY timed out: {}'.format(t)) except Exception as e: return Exitcode(500, 'STIDY crashed: {}'.format(e)) stidy_output = stidy_data[0].decode('utf-8') # clean up files if path.isfile('check.def'): remove('check.def') return stidy_output
def process_inputs(): dir = 'C:/Workspace/Bills/input' ext = '*-raw.txt' inputs = list() dirs = ['train', 'test'] for d1 in dirs: files = glob(join(dir, d1, ext)) for f in files: d = set([]) if path.isfile(f) == True: txtfile = open(f).readlines() raw = list() for line in txtfile: emails = regex_email.findall(line) if len(emails): for email in emails: raw.append(email) else: flag, txt = process_txt(line) if flag: if len(txt) > 2: raw.append(txt) if len(raw) > 0: for sentence in raw: emails = regex_email.findall(sentence) if len(emails): words = emails else: words = nltk.word_tokenize(sentence) d = d | set(words) sd = set(sorted(d)) vocab = set([]) porter = nltk.PorterStemmer() for stemming in sd: if not is_key_word(stemming): stemmed_word = porter.stem(stemming) else: stemmed_word = stemming vocab.add(stemmed_word) head, tail = path.split(f) find_idx = tail.rfind('.txt') if find_idx != -1: s = tail.replace('-raw.txt', '-input.txt') txtfilepath = path.join(head, s) txtf = open(txtfilepath, 'w+') for item in vocab: txtf.write(item) txtf.write('\n') txtf.close()
def get_CParent_to_root_path_node_names(parse_dict,docID,sentID,conn_indices): parse_tree = parse_dict[docID]["sentences"][sentID]["parsetree"].strip() syntax_tree = Syntax_tree(parse_tree) if syntax_tree.tree == None: path = "NONE_TREE" else: path = "" for conn_index in conn_indices: conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index) conn_parent_node = conn_node.up path += syntax_tree.get_node_path_to_root(conn_parent_node) + "-->" if path[-3:] == "-->": path = path[:-3] return path.split("-->")
def get_last_checkpoint(folder, modelasr_name): content = os.listdir(folder) # print(content) checkpoints = [] for path in content: if str(path.split('.')[0]).startswith(modelasr_name): checkpoints.append(path) if len(checkpoints) == 0: return _re_checkpoint = re.compile(r'(\d+)') max_checkpoint = max( checkpoints, key=lambda x: int(_re_checkpoint.search(x).groups()[0])) step_num = int(_re_checkpoint.search(max_checkpoint).groups()[0]) return os.path.join(folder, max_checkpoint), step_num
def response_path(path): """ This method should return appropriate content and a mime type. TOOD: get mime types aligned make PNG work """ content = '' try: if path == '/': content = "{0}".format('\n'.join( [x for x in os.listdir('.\\webroot')])) else: file = open(Path.cwd().joinpath('webroot', path[1:])) content = file.read() file.close() ''' #this seems weird to have an error raise an error, but I'm following the todo form line 116 in the instructions 'If response_path raised # a NameError, then let response be a not_found response.' ''' file_ext = path.split(".") file_type = ''.join(file_ext[-1:]) if file_type == 'html': mime_type = b"text/html" if file_type == 'h': mime_type = b"text/html" if file_type == 'txt': mime_type = b"text/plain" if file_type == 'png': mime_type = b"image/apng" if file_type == "jpeg": mime_type = b"image/jpeg" else: mime_type = b"text/plain" except FileNotFoundError: raise NameError # mime_type = mime_type.encode() content = content.encode() return content, mime_type
def handleRequest(self, path, arguments, **kwargs): fileargs = dict(case=path.split('/')[-1]) for k in ['verb', 'resumptionToken', 'metadataPrefix', 'set', 'from']: fileargs[k] = arguments[k][0] if k in arguments else '' if fileargs['resumptionToken']: filename = '{case}-{verb}-{resumptionToken}.xml'.format(**fileargs) else: filename = '{case}-{verb}-{metadataPrefix}-{set}-{from}.xml'.format( **fileargs) filepath = join(testdataDir, filename) if isfile(filepath): yield 'HTTP/1.0 200 Ok\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n' with open(filepath) as fp: yield fp.read() else: yield 'HTTP/1.0 400 Bad Request\r\nContent-Type: text/plain; charset=utf-8\r\n\r\n' l = 'Error: file {} not found.'.format(repr(filepath)) print(l) yield l
def process_vocab(self, vera_path): #Accerss all the vera.ai files under the given path and generate the superset - vocab vocab_path = join(vera_path, 'vocab.vera') vocab = set([]) for f in glob(path.join(vera_path, '*-vera.ai')): #for each vera.ai file create a set of words lines = open(f).readlines() if len(lines) > 0: vocab = vocab | set(lines) if path.exists(vocab_path): head, tail = path.split(vocab_path) tail = tail.replace(tail.rfind('.'), '-backup.') if path.exists(path.join(head, tail)): os.unlink(path.join(head, tail)) os.rename(vocab_path, path.join(head, tail)) hf = open(vocab_path, 'w') for item in vocab: hf.write(item) hf.close()
def process_vocab(): dir = 'C:/Workspace/Bills/input' ext = '*-input.txt' d = set([]) dirs = ['train', 'test'] for d1 in dirs: vocab = set([]) files = glob(join(dir, d1, ext)) for f in files: d = set([]) if path.isfile(f) == True: txtfile = open(f).readlines() raw = list() for line in txtfile: raw.append(line) if len(raw) > 0: d = d | set(raw) vocab = vocab | d head, tail = path.split(f) txtfilepath = join(head, 'Respicio-pp.txt') txtf = open(txtfilepath, 'w+') for item in vocab: txtf.write(item) txtf.close()
def run(): ext = '*.jpg' dir = 'C:/Workspace/Bills' ext_txt = 'txt' files = glob(join(dir, 'image', ext)) txtdir = path.join(dir, 'input') for f in files: if path.isfile(f) == True: img = Image.open(f) txt = pytesseract.image_to_string(img) head, tail = path.split(f) find_idx = tail.rfind('.jpg') new_tail = tail if find_idx != -1: new_tail = tail.replace('.jpg', '-raw.txt') txtfilepath = path.join(txtdir, new_tail) flag, raw = process_txt(txt) if flag: txtf = open(txtfilepath, 'w+') for i in raw: txtf.write(i) txtf.write('\n') txtf.close() err = 0
async def Handle_Pcap(path): files_SendResquest = [] task_SendRequest = [] # extract http, ftp query1 = "tcpflow -r " + active_File + " -o " + path + " -e http" subprocess.check_output(query1, shell=True) # extract SMB list_SMB, list_Task = Export_SMB2() if (len(list_SMB) > 0): files_SendResquest.extend(list_SMB) task_SendRequest.extend(list_Task) list = os.listdir(path) markFtp = [] # remove file unuse for index in list: fullPath = os.path.join(path, index) mime_Type = mime.from_file(fullPath) check = Check_UnFile(mime_Type) # Add find Ftp if (mime_Type == "text/plain"): if (index.find(portFtp) != -1): markFtp.append(index) elif (mime_Type in deny_MimiType or check == 1): os.remove(fullPath) list = os.listdir(path) # handle file for index in range(len(list)): no_FullPath = os.path.join(path, list[index]) mime_Type = mime.from_file(no_FullPath) if (mime_Type != 'text/plain'): #print(fullPath + "\n") ValidIpAddressRegex = r'''(25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)''' check = re.search(ValidIpAddressRegex, list[index]) if (check is not None): fullPath, task = Parse_FileName(list[index], path, markFtp) if fullPath is None: continue if (fullPath == ''): os.remove(no_FullPath) continue if (mime_Type == 'application/zip' or mime_Type == 'application/x-7z-compressed' or mime_Type == 'application/x-rar-compressed'): try: extract_Files, extract_Tasks = Extract_FileCompressed( fullPath, task) files_SendResquest.extend(extract_Files) task_SendRequest.extend(extract_Tasks) except: print('not Extract file zip : ' + fullPath) else: file = Static_Analyst(fullPath, task) #print(fullPath + "\n") if (file != ''): task = task.obj_dict() task_SendRequest.append(task) files_SendResquest.append(file) history = path.split("/")[-1] + ".log" logging.basicConfig(level=logging.DEBUG, filename=os.path.join('Log', history), format='%(asctime)s %(levelname)s:%(message)s') logger = logging.getLogger(__name__) logger.info(" ------- \t File Send Dynamic \t ------ \n ") logger.info("Count: %d", len(files_SendResquest)) print(" ------- \t File Send Dynamic \t ------ \n ") print("Count: %d", len(files_SendResquest)) #print("Send Count: " + str(len(files_SendResquest))) for i in files_SendResquest: print(i) logger.info(i) logger.info(" ------- \t Finish \t ------ \n ") print(" ------- \t Finish \t ------ \n ") await Dynamic_Analyst(files_SendResquest, task_SendRequest, logger)
for i in range(ids.shape[1]): gcam.backward(ids[:, [i]]) regions = gcam.generate(target_layer=layer) for j in range(len(images)): print(f"#{j}: {classes[ids[j, i]]} ({probs[j, i]:.5f})") # Grad-CAM raw_image = imread(paths[j]) combined = combine_image_and_gcam(regions[j, 0], raw_image) processed_images[j].append(combined.astype(np.uint8)) for j, (image_list, path) in enumerate(zip(processed_images, paths)): plt.figure(figsize=(16, 4)) for i, image in enumerate(image_list): plt.subplot(1, len(image_list), i + 1, xticks=[], yticks=[], frameon=False) c, p, t = classes[ids[j, i]], 100 * probs[j, i], bool(labels[j, i]) plt.title(f"{c} {p:.0f}% ({t})", fontsize=10) plt.imshow(image) plt.tight_layout() filename = '-'.join(path.split('/')[-3:]) filename = splitext(filename)[0] + '.png' plt.savefig(join(opt.output, filename)) plt.clf()
import time from mpi4py import MPI import h5py import logging comm = MPI.COMM_WORLD rank = comm.Get_rank() cpus = comm.Get_size() ts = time.clock() with open("/home/hklee/work/envs/envs.dat", "r") as f: contents = f.readlines() for path in contents: if "total_path" in path: total_path = path.split("=")[1] elif "result" in path: result_path = path.split("=")[1] elif "parameter" in path: para_path = path.split("=")[1] elif "log" in path: log_path = path.split("=")[1] logger = logging.getLogger() logger.setLevel(logging.INFO) logfile = log_path + '%d_log.dat' % rank lf = logging.FileHandler(logfile, 'w') form = logging.Formatter('%(asctime)s - %(message)s') lf.setFormatter(form) logger.addHandler(lf)
from mpi4py import MPI import warnings # to stack the shear catalogs of each exposure into a file warnings.filterwarnings("error") comm = MPI.COMM_WORLD rank = comm.Get_rank() cpus = comm.Get_size() with open("/home/hkli/work/envs/envs.dat", "r") as f: contents = f.readlines() for path in contents: if "cfht_data_path" in path: data_path = path.split("=")[1] elif "cfht_field_path" in path: field_path = path.split("=")[1] cfht_dict, fields = tool_box.field_dict(data_path + "nname.dat") field_pool = tool_box.allot(fields, cpus) for field in field_pool[rank]: expos = list(cfht_dict[field].keys()) f_path = field_path + field + "/" for expo in expos: i = 0 for chip in cfht_dict[field][expo]: dat_path = data_path + "%s/result/%s_shear.dat" % (field, chip) try: temp = numpy.loadtxt(dat_path, skiprows=1)
cut = argv[1] g1num = cpus - 6 g2num = cpus g1 = numpy.linspace(-0.004, 0.004, g1num) g2 = numpy.linspace(-0.0055, 0.0055, g2num) dg1 = g1[1] - g1[0] dg2 = g2[1] - g2[0] t1 = time.clock() with open("%s/work/envs/envs.dat" % my_home, "r") as f: contents = f.readlines() for path in contents: if "cfht_data_path" in path: data_path = path.split("=")[1] elif "cfht_res_path" in path: result_path = path.split("=")[1] elif "cfht_pic_path" in path: pic_path = path.split("=")[1] elif "cfht_cut_path" in path: cut_path = path.split("=")[1] fq = Fourier_Quad(48, 123) g1_data_path = result_path + "g1_%d.npz" % rank g2_data_path = result_path + "g2_%d.npz" % rank g1num = cpus - 6 g2num = cpus g1 = numpy.linspace(-0.004, 0.004, g1num)
# to find the binary on the each source chip. it will save the binary label for each source. # '1' means binary # if the command input is 'find', it will find the binaries # if 'stack' is input, it will stack the existing binary label files. comm = MPI.COMM_WORLD rank = comm.Get_rank() cpus = comm.Get_size() ts = time.clock() with open("%s/work/envs/envs.dat" % my_home, "r") as f: contents = f.readlines() for path in contents: if "cfht_data_path" in path: data_path = path.split("=")[1] elif "cfht_res_path" in path: result_path = path.split("=")[1] elif "cfht_pic_path" in path: pic_path = path.split("=")[1] elif "cfht_field_path" in path: field_path = path.split("=")[1] size = 48 fq = Fourier_Quad(size, 123) nname_path = data_path + "nname.dat" field_dict, fields = tool_box.field_dict(nname_path) r_fields = tool_box.allot(fields, cpus)[rank] # for the stacking process
import numpy import time import os import matplotlib.pyplot as plt import tool_box import copy data_name, g1num, g2num, bin_num, thresh = argv[1], argv[2], argv[3], argv[ 4], float(argv[5]) g1num, g2num, bin_num = int(g1num), int(g2num), int(bin_num) with open("/home/hkli/work/envs/envs.dat", "r") as f: contents = f.readlines() for path in contents: if "cfht_data_path" in path: total_path = path.split("=")[1] elif "cfht_res_path" in path: result_path = path.split("=")[1] field_path = result_path + "field/" nname_path = total_path + "nname.dat" all_fields = tool_box.field_dict(nname_path)[1] filter_path = result_path + "field/filtered.dat" filter_exist = os.path.exists(filter_path) fq = Fourier_Quad(48, 123) cache_path = result_path + data_name print(cache_path) arr = numpy.load(cache_path)["arr_1"]