def convert(self): if len(self.cargs) < 3: raise CommandError("Convert requires 2 arguments") xml_path = self.cargs[1] xslt_path = self.cargs[2] out_file = self.cargs[3] if len(self.cargs) > 3 else None xml_string = utils.readFile(xml_path) xml_string = re.sub(ur"\bxmlns=", ur"xmlns2=", xml_string) # TODO: remove this hack, only for odt conversion # position 33% is like 'super' style xml_string = re.sub(ur'"-33%', ur'"sub', xml_string) xml_string = re.sub(ur'"33%', ur'"super', xml_string) xslt_string = utils.readFile(xslt_path) # replacements in the XSLT comments, xslt_string = self.parse_xslt_directives(xslt_string, xml_string) ret = str(dputils.get_xslt_transform(xml_string, xslt_string)) if out_file: dputils.write_file(out_file, str(comments) + ret, encoding=None) else: print str(comments) + ret return ret
def val(self): if len(self.cargs) < 2: raise CommandError('Convert requires 1 arguments') xml_path = self.cargs[1] val_path = None if len(self.cargs) > 2: val_path = self.cargs[2] xml_string = utils.readFile(xml_path) import lxml.etree as ET try: dom = dputils.get_xml_from_unicode(xml_string) if val_path: from io import StringIO dtd = ET.DTD(open(val_path, 'rb')) valid = dtd.validate(dom) if not valid: for error in dtd.error_log.filter_from_errors(): print error except ET.XMLSyntaxError as e: print u'XML Syntax Error %s' % e
def run(self, update, packager, distRelease): content = "" # one mail by update reminderStr = packager.id + "-" + distRelease.tag + "-" + update.packageName \ + "-" + update.bundles[0].version reminders = [] try: if not os.path.exists(config.REMINDER_FILE): utils.writeFile(config.REMINDER_FILE, "# Remove this file if you will re-send mail alerts\n", "w") content = utils.readFile(config.REMINDER_FILE); reminders = content.splitlines() except: pass for r in reminders: if r == reminderStr: return "skipped" content += reminderStr + "\n" utils.writeFile(config.REMINDER_FILE, content, 'w') toaddrs = packager.mail + "\n" subject = "Updates are available for %s \n" % update.packageName msg = "To: " + toaddrs + "\n"\ + "From: " + config.FROM_ADRS + "\n" \ + "Subject: " + subject + "\n" \ + "PACKAGE INFO:\n" \ + str(update) \ + "\nRELEASE INFO:\n" \ + str(distRelease) server = smtplib.SMTP(config.SMTP_HOSTNAME) server.sendmail(config.FROM_ADRS, toaddrs, msg) server.quit() return "done"
def stats(self): if len(self.cargs) < 2: raise CommandError('Convert requires 1 arguments') xml_path = self.cargs[1] xml_string = utils.readFile(xml_path) print 'Count - Tag' print elements = re.findall(ur'<(\w+)', xml_string) for el in set(elements): print '%8d %s' % (elements.count(el), el) print print 'Unique tag-attributes' print els = {} elements = re.findall(ur'<([^>]+)>', xml_string) for el in elements: el = el.strip() if el[0] not in ['/', '?', '!']: els[el] = 1 for el in sorted(els): print el
def readResultFile(self): """Read result File if it exists and return a sorted dictionary.""" rejected = utils.readFile(os.path.join(self.logDir, 'rejected')) if os.path.exists(self.resultFilename): entries = utils.readResultFile(self.resultFilename) resultData = {} resultData.setdefault('WorkerID', []) resultData.setdefault('HitID', []) resultData.setdefault('AssignmentID', []) resultData.setdefault('StartTime', []) resultData.setdefault('StopTime', []) resultData.setdefault('Feedback', []) resultData.setdefault('Annotation', []) resultData.setdefault('ImageID', []) for entry in entries: if entry[6] != "no annotation" and entry[2] not in rejected: resultData['WorkerID'].append(entry[0]) resultData['HitID'].append(entry[1]) resultData['AssignmentID'].append(entry[2]) resultData['StartTime'].append(entry[3]) resultData['StopTime'].append(entry[4]) resultData['Feedback'].append(entry[5]) imageID = ",".join(entry[6].split(",")[:2]) x, y, name = imageID.split(",")[1].split("_", 2) annotation = [float(value) for value in entry[6].split(",")[2:]] annotation = zip(*[iter(annotation)] * 2) annotation = [QPointF(point[0] + float(x), point[1] + float(y)) for point in annotation] resultData['Annotation'].append(QPolygonF(annotation)) resultData['ImageID'].append(name) else: logging.error("No Resultfile!") self.resultData = resultData
def processOutput(self, testFilePath): # print "processOutput" referenceOutput = xml_utils.sortAndCanonizeXML(utils.readFile(testFilePath).replace('\n', '')) outputCanonicalXML = xml_utils.sortAndCanonizeXML(self.lastSocketOutput) #print referenceOutput #print outputCanonicalXML atributeValueToDelete = IGNORED_ATTRIBUTE # index of target attribute cutReferenceOutput = referenceOutput cutOutputCanonicalXML = outputCanonicalXML refIdx = cutReferenceOutput.find(atributeValueToDelete) while refIdx != -1: # remove target from REF cutReferenceOutput = cutReferenceOutput.replace(atributeValueToDelete, '', 1) # find end of target attribute assignment outputIdx = cutOutputCanonicalXML.find("\"",refIdx) # remove target from OUT cutOutputCanonicalXML = cutOutputCanonicalXML[:refIdx] + cutOutputCanonicalXML[outputIdx:] # index of target attribute refIdx = cutReferenceOutput.find(atributeValueToDelete) if(cutReferenceOutput == cutOutputCanonicalXML): return True else: print "" print "R:", referenceOutput print "o:", outputCanonicalXML return False
def readData( self ): """readData() Reads the data from the data source. Preconditions: this process has a lock on the basefile """ log.trace( 'entry' ) self.__threadLock.acquire() try: self.__checkLock() if os.path.exists( self.__filename ): rawData = utils.readFile( self.__filename ) log.debug( 'Got %s lines of raw data.' % len( rawData ) ) self.__parseData( rawData ) log.debug( 'Parsed into %s lines of usable data.' % len( self.__data ) ) self.__dataCached = 1 finally: self.__threadLock.release() log.trace( 'exit' )
def editFiles(mntDir, name, mac): """ Edit files on image @type mntDir: str @param mntDir: mount dir of the VM disk @type name: str @param name: vm name @type mac: str @param mac: mac address @rtype: None @returns: Nothing """ # change hostname print "Image Editing\n\n" print "Changing hostname" hostname = os.path.join(mntDir, "etc/hostname") utils.recordFile(name, hostname) # set network as dhcp print "Set network as DHCP" netconfig = os.path.join(mntDir, "etc/sysconfig/network-scripts/ifcfg-eth0") content = utils.readFile(netconfig) bootproto = re.search("BOOTPROTO=.*",content).group(0) if "dhcp" not in bootproto: utils.sedFile(bootproto, "BOOTPROTO=dhcp", netconfig) # print ip used print "IP: " + IP[mac] print "root password: 60fb619414782814d344e9a08d77101ce31057fec58f35643ad167cf67c5e128ea66c750d08282633bb432619c146e0543ae8d61b885902b9f61302b537aaf07" commom.printSSHPort(mntDir)
def editFiles(mntDir, name, mac): """ Edit files on image @type mntDir: str @param mntDir: mount dir of the VM disk @type name: str @param name: vm name @type mac: str @param mac: mac address @rtype: None @returns: Nothing """ # change hostname print "Image Editing\n\n" print "Changing hostname" hostname = os.path.join(mntDir, "etc/hostname") utils.recordFile(name, hostname) # set network as dhcp print "Set network as DHCP" netconfig = os.path.join(mntDir, "etc/sysconfig/network/ifcfg-eth0") content = utils.readFile(netconfig) bootproto = re.search("BOOTPROTO=.*",content).group(0) if "dhcp" not in bootproto: utils.sedFile(bootproto, "BOOTPROTO=dhcp", netconfig) # print ip used print "IP: " + IP[mac] print "root password: 39af21a2ac52f3d5d0df3400c78342b2af80010e0bf3eb5d9e4a9fb931f0ea13904019853568d66d8428265e43e1d237339fda6f9056cc8eca8a582734e3ee8c" commom.printSSHPort(mntDir)
def processSleep(self, testFilePath): sleepTime = utils.readFile(testFilePath) try: time.sleep(float(sleepTime)/1000.0) except: return False return True
def transform(beforeTransformFilename, afterTransformFilename): phone_48_int_map = '../map/phone_48_int.map' phone_48_39_int_map = '../map/phone_48_39_int.map' f1 = open(phone_48_int_map, 'r') f2 = open(phone_48_39_int_map, 'r') f3 = open(afterTransformFilename, 'w') d_int_48_map = {} for i in f1: i = i.strip() i = i.strip('\n') i = i.split() d_int_48_map[i[1]] = i[0] name, label = readFile(beforeTransformFilename) newlabel = [''] * len(label) for i in xrange(len(name)): newlabel[i] = d_int_48_map[label[i].strip('\n')] d_48_39_map = {} for i in f2: i = i.strip() i = i.split() d_48_39_map [i[0]] = i[1] for i in xrange(len(newlabel)): newlabel[i] = d_48_39_map[newlabel[i]] f3.write('Id,Prediction\n') for i in xrange(len(name)): f3.write(name[i] + ',' + newlabel[i] + '\n')
def processInput(self, testFilePath): input = utils.readFile(testFilePath) self.socket.write(input) self.lastSocketOutput = self.socket.read() requestType = re.search('type="(.+?)"', input).group(1) return True
def convert(self): if len(self.cargs) < 3: raise CommandError('Convert requires 2 arguments') xml_path = self.cargs[1] xslt_path = self.cargs[2] xml_string = utils.readFile(xml_path) xml_string = re.sub(ur'\bxmlns=', ur'xmlns2=', xml_string) xslt_string = utils.readFile(xslt_path) # replacements in the XSLT xslt_string = self.parse_xslt_directives(xslt_string, xml_string) ret = dputils.get_xslt_transform(xml_string, xslt_string) print str(ret) return ret
def stats(self): if len(self.cargs) < 2: raise CommandError('Convert requires 1 arguments') xml_path = self.cargs[1] xml_string = utils.readFile(xml_path) from utils import get_stats_from_xml_string stats = get_stats_from_xml_string(xml_string) print repr(stats)
def processOutput(self, testFilePath): # print "processOutput" referenceOutput = xml_utils.sortAndCanonizeXML(utils.readFile(testFilePath).replace("\n", "")) outputCanonicalXML = xml_utils.sortAndCanonizeXML(self.lastSocketOutput) if referenceOutput == outputCanonicalXML: return True else: print "" print "R:", referenceOutput print "o:", outputCanonicalXML return False
def constructList(targetDirectory): labelList = [] numOfList = 0 nameList = [] for filename in os.listdir( os.getcwd() + '/' + targetDirectory ): numOfList += 1 #trivial dummy_name, dummy_label = readFile(os.getcwd() + '/' + targetDirectory +filename) #read in the file labelList.append(dummy_label) nameList = dummy_name return labelList, numOfList, nameList
def pay(self, feedback): """ Approve (i.e.) pay all assignments that have been submitted, but not approved or rejected yet. If there is an outliers list, all assignments of the workers listed there are rejected (i.e. NOT paid). """ payed_count = 0 outlier_count = 0 hit_ids = utils.readFile(self.hitslog_filename) rejected = utils.readFile(os.path.join(self.logDir, 'rejected')) approved = utils.readFile(os.path.join(self.logDir, 'approved')) self.parent().statusBar.children()[2].setRange(0, len(hit_ids)) for i, hit_id in enumerate(hit_ids): for assignment in self.connection.get_assignments(hit_id=hit_id, status="Submitted"): id = assignment.AssignmentId if id in approved: try: self.connection.approve_assignment(assignment_id=id, feedback=feedback) logging.info("Paying assignment" + id) payed_count += 1 except: logging.error("Problem with approving assignment probably not" "enough credit") elif id in rejected: try: feedback = "Unfortunately we cannot accept your work" "because you did not follow the instructions or sub-" "mitted careless work." self.connection.reject_assignment(assignment_id=id, feedback=feedback) logging.info("Rejecting assignment" + id) outlier_count += 1 except: logging.error("Problem with rejecting assignment") else: logging.info("Unreviewed assignment: {0}".format(id)) self.parent().progress.emit(i + 1)
def processInput(self, testFilePath): try: self.socket = utils.SecuredSocket(self.host, self.port) except: print utils.failure("FAIL: cannot connect to ada_server socket") raise input = utils.readFile(testFilePath) self.socket.write(input) self.lastSocketOutput = self.socket.read() return True
def seg_HMM(model, test_set): seg = [] lines = readFile(test_set) for line in lines: line = line.strip() if len(line) > 0: linebegin = line[:19] line = line[19:] tmp_seg = linebegin + "/ " + "/ ".join(model.seg(line)) tmp_seg += "/ " seg.append(tmp_seg) else: seg.append("") return seg
def getSpecfileContent(self, pkgName, tag): pwd = commands.getoutput("pwd"); tmpDir = tempfile.mkdtemp("%s-%s_spec" % (config.APP_NAME, pkgName)) os.chdir(tmpDir) specfile = os.path.join(pkgName, tag, pkgName + ".spec") cmd = "CVSROOT=:pserver:[email protected]:/cvs/pkgs cvs co %s" % specfile status = os.system(cmd) if status > 0: config.LOG.error("Error during '%s'" % cmd) return content = utils.readFile(specfile) os.chdir(pwd) os.system("rm -rf " + tmpDir) return content
def harvest(self): """ This downloads all assignments that have not been rejected or approved yet of all HITs with status "Reviewable" to <self.resultFilename>. For every assignment, the downloaded fields are: worker ID, hit ID, assignment ID, accept time, submit time, worker feedback (if any, otherwise "no feedback"), polygon annotation ("no annotation", if for some reason the annotation is not present). """ self.parent().status.emit("Downloading results") log = codecs.open(self.resultFilename, "w", "utf-8") hit_ids = utils.readFile(self.hitslog_filename) complete = True self.parent().statusBar.children()[2].setRange(0, len(hit_ids)) for i, hit_id in enumerate(hit_ids): hit = self.connection.get_hit(hit_id=hit_id)[0] if not hit.HITStatus == "Reviewable": complete = False continue rs = self.connection.get_assignments(hit_id=hit_id, page_size=100) # default assignment status: submitted, including approved and rejected self.parent().progress.emit(i + 1) for n, assignment in enumerate(rs): self.parent().status.emit("{0}... #{1}".format(hit_id[:10], n + 1)) workerId = assignment.WorkerId hitId = assignment.HITId assignmentId = assignment.AssignmentId acceptTime = assignment.AcceptTime submitTime = assignment.SubmitTime feedback = "no feedback" annotation = "no annotation" for answer in assignment.answers[0]: label = answer.fields[0][0] content = answer.fields[0][1] if label == "feedback" and content.strip() != "You can leave your feedback here": feedback = content elif label == "segpoly": if content != "": annotation = content fields = [workerId, hitId, assignmentId, acceptTime, submitTime, feedback, annotation] for field in fields: log.write(field) log.write("\n") log.write("\n") if not complete: self.parent().status.emit("Not all HITs could be downloaded") else: self.parent().status.emit("Done")
def CreatePlaylist(self, text): playlistFolder = utils.config["playlistFolder"] playlistFolder += "/" if playlistFolder[-1] != "/" else "" pathToNewPlaylist = playlistFolder + utils.nameToDirectoryName( text) + ".dat" utils.writeFile(pathToNewPlaylist, "") playlistRoot = utils.config["playlistRoot"] playlistRootContent = utils.readFile(playlistRoot, forceLastVersion=True) playlistRootContent += "\n" if len( playlistRootContent ) > 0 and playlistRootContent[-1] != "\n" else "" playlistRootContent += text + "\\" + pathToNewPlaylist utils.writeFile(playlistRoot, playlistRootContent)
def scan(): step = 1.5 scale = 1.25 width = 24 height = 24 cascade = utils.readFile('../result/20170616015311_/cascade.txt') faces = [] imagePath = '../../BaoDataBase/myDataBase/1.jpg' iimage = IntegralImage(imagePath, 1) iWidth = iimage.original.shape[1] iHeight = iimage.original.shape[0] curScale = 1.0 detect_folder = '../result/' + strftime( "%Y%m%d-%H%M%S", time.localtime()).replace('-', '') + '/' os.makedirs(os.path.dirname(detect_folder), exist_ok=True) debug('{0} : {1}\n'.format(iWidth, iHeight)) count = 0 while width < iWidth and height < iHeight: for x in range(0, np.int(iWidth - width), np.int(np.ceil(step * scale))): for y in range(0, np.int(iHeight - height), np.int(np.ceil(step * scale))): print('x : y = {0} : {1}'.format(x, y)) #print('width : height = {0} : {1}'.format(width,height)) pred = cascade.predict_with_scale(iimage, (x, y), curScale) ''' #iimage.image.show() if count % 1 ==0: copy=iimage.image.copy() highlight.drawSquare(copy,[(x,y),(int(x+width),int(y+height))]) copy.save(os.path.join(detect_folder,str(count)+'.jpg')) count += 1 ''' ''' pred = cascade.predict_with_scale(iimage,(x,y),curScale) if pred == 1: sub = iimage.image.crop((x,y,int(x+width),int(y+height))) sub.save(os.path.join(detect_folder,str(count)+'.jpg')) count += 1 ''' width *= scale height *= scale curScale *= scale
def processOutputSyntax(self, testFilePath): # print "processOutput" referenceOutputSyntax = xml_utils.getCanonicalXmlFromString(utils.readFile(testFilePath)) # remove starting declaration element serverSyntax = re.sub(r'^\<\?xml.*?\?\>', r'', self.lastSocketOutput) # remove all data in attributes serverSyntax = re.sub(r'=".*?"', r'=""', serverSyntax) # print serverSyntax outputCanonicalXMLSyn = xml_utils.getCanonicalXmlFromString(serverSyntax) # print referenceOutput , outputCanonicalXML if(referenceOutputSyntax == outputCanonicalXMLSyn): return True else: print referenceOutputSyntax print outputCanonicalXMLSyn return False
def main(): fileName = sys.argv[1] boardingPasses = readFile(fileName) # print(boardingPasses) takenSeats = [] for boarding in boardingPasses: seat = {} highLim = 127 lowLim = 0 row = -1 for direction in boarding[0:6]: if direction == "F": highLim = highLim - int((highLim - lowLim + 1) / 2) elif direction == "B": lowLim = lowLim + int((highLim - lowLim + 1) / 2) if boarding[6] == "F": row = lowLim elif boarding[6] == "B": row = highLim lowLim = 0 highLim = 7 col = -1 for direction in boarding[6:]: if direction == "L": highLim = highLim - int((highLim - lowLim + 1) / 2) elif direction == "R": lowLim = lowLim + int((highLim - lowLim + 1) / 2) if boarding[9] == "L": col = lowLim elif boarding[9] == "R": col = highLim takenSeats.append( dict([("row", row), ("col", col), ("id", row * 8 + col)])) ID = [seat["id"] for seat in takenSeats] print(max(ID)) ID.sort() for i in range(1, len(ID) - 1): if ID[i] - ID[i - 1] > 1: print(ID[i]) elif ID[i + 1] - ID[i] > 1: print(ID[i])
def build_bi_prefix_dict(dicpath): prefix_dict = {} # dic with word pair frequence lines = readFile(dicpath) for line in lines: if len(line) == 0: continue tmp = line.strip().split(" ") word1, word2, freq = tmp[0], tmp[1], int(tmp[2]) if word1 not in prefix_dict: tmp_dic = {} tmp_dic[word2] = freq prefix_dict[word1] = tmp_dic else: prefix_dict[word1][word2] = freq return prefix_dict
def html2xml(self): if len(self.cargs) < 2: raise CommandError('Convert requires 1 arguments') html_path = self.cargs[1] html_string = utils.readFile(html_path) import re html_string = re.sub(ur'.*(?musi)(<body.*/body>).*', ur'\1', html_string) from BeautifulSoup import BeautifulSoup soup = BeautifulSoup(html_string, 'html.parser') ret = soup.prettify() return ret
def sendFile(self, filename, sendTo): print("start to send file: " + filename) # pack timestamp and filename start_time = time.time() # remove prefix bfilename = filename.split('/')[-1].encode('utf8') # send metadata self.send(bfilename, sendTo) for data in readFile(filename, chunk_size=DATA_LENGTH): self.send(data, sendTo) # end flag self.send(bytes(), sendTo, 2) end_time = time.time() print("\nTook {:.2f}s to transfer.".format(end_time - start_time))
def main(): inputFile = sys.argv[1] map = readFile(inputFile) print("Input File: {0}".format(inputFile)) encounteredTrees = [] slopeAlternatives = [{"D":1, "R":1}, {"D":1, "R":3}, {"D":1,"R":5}, {"D":1, "R":7}, {"D":2, "R":1}] for i in range(len(slopeAlternatives)): treeCount = countTrees(map, slopeAlternatives[i]) print("For slope R{1} D{0} number of encountered trees are: {2}".format(slopeAlternatives[i]["D"], slopeAlternatives[i]["R"], treeCount)) encounteredTrees.append(treeCount) treeProduct = 1 for trees in encounteredTrees: treeProduct = treeProduct * trees print("Encountered trees multiplied together: {0}".format(treeProduct))
def main(): lines = utils.readFile("test") i = 1 result = [] for line in lines: c, flips = case(line) if c: print("Case #" + str(i) + ": " + str(flips)) result.append("Case #" + str(i) + ": " + str(flips)) else: print("Case #" + str(i) + ": IMPOSSIBLE") result.append("Case #" + str(i) + ": IMPOSSIBLE") i += 1 utils.saveToFile('result', result)
def main(): fileName = sys.argv[1] content = readFile(fileName) passportInfoCollection = extractPassport(content) FIELDS = ("hcl", "iyr", "byr", "hgt", "eyr", "ecl", "pid") count = 0 validField = 0 for passportInfo in passportInfoCollection: if containsFields(passportInfo, FIELDS): count = count + 1 if validateFields(passportInfo): validField = validField + 1 print("Total number of passports: {0}".format(len(passportInfoCollection))) print("Number passports with all fields present: {0}".format(count)) print("Number passports with all fields valid: {0}".format(validField))
def printSSHPort(mntDir): """ Prints ssh port @type mntDir: str @param mntDir: dir where image is mounted @rtype: None @returns: Nothing """ # display ssh info content = utils.readFile(os.path.join(mntDir, "etc/ssh/sshd_config")) reg = re.search(r"(\nPort) (?P<port>[0-9]*)", content) # no ssh passed: print default if reg == None: print "SSH port is 22" else: print "SSH port is %(port)s" % reg.groupdict()
def challenge9(path): text = readFile(path, "rt") comment = re.findall("<!--(.*?)-->", text, flags=re.DOTALL)[0] firstStr = re.findall("first:\n(.*?)\n\n", comment, flags=re.DOTALL)[0] secondStr = re.findall("second:\n(.*?)\n\n", comment, flags=re.DOTALL)[0] firstList = firstStr.split(',') first = list(map(int, firstList)) secondList = secondStr.split(',') second = list(map(int, secondList)) print('first:', first) print('second:', second) im = Image.new('RGB', (500, 500)) draw = ImageDraw.Draw(im) draw.polygon(first, fill='white') draw.polygon(second, fill='white') im.show() return None
def getTurked(self): """ Parses the annotations out of <self.resultFilename> and writes them to <self.new_annotation>. The best annotation for every image is obtained in polygon_utils.merge_annotations; you can supply your own function if you want to change the default behaviour. If you have defined any outlier workers in <self.outliers>, their annotations will not be considered. If some annotations are empty (i.e. their entry in <self.resultFilename> is "no annotations"), their number will be printed. """ # Read file entries = utils.readResultFile(self.resultFilename) rejected = utils.readFile(os.path.join(self.logDir, 'rejected')) # Init updatedict updatedict = {} # {imageID: [[x1, y1, x2, y2], [x1, y1, x2, y2], ... ]} for filename in self.videoLabelHandler.files: updatedict.setdefault(filename, []) # Build updatedict no_annotation = 0 for entry in entries: annotation = entry[6] assignmentId = entry[2] if assignmentId not in rejected: if annotation == "no annotation": no_annotation += 1 else: image = annotation.split(",")[0] x, y, imageID = image.split("_") x, y = float(x), float(y) correspondence = annotation.split(",")[1:] if len(correspondence) > 1: correspondence = [round(float(number), 3) for number in correspondence] correspondence = zip(*[iter(correspondence)]*4) # Split list into 4 tuples correspondence = [(pts[0] + x, pts[1] + y, pts[2] + x, pts[3] + y) for pts in correspondence] updatedict[imageID] += correspondence if no_annotation > 0: logging.info("{0} empty annotations!".format(no_annotation)) # Write correspondence XML self.videoLabelHandler.writeCorrespondenceXML(self.correspondenceFile, updatedict)
def sim_phon_closest(l1): """ Should return a tuple: langs, closest. :param l1: :returns: ? """ #langs = phoible.loadlangs() #closest = phoible.getclosest(l1, langs) #return langs, closest uriel.u.loadfeatures() # hack! langlist = map(lambda p: p[0], utils.readFile("/shared/experiments/mayhew2/transliteration/tl_sim/wikinames.txt")) uriel.u.loadinventorysets(langlist) closest = uriel.getclosest(l1) langs = uriel.u.featlangs return langs, closest
def getTurked(self): """ Parse the annotations out of <self.resultFilename> and writes them to <self.newProjFile>. The best annotation for every image is obtained in polygon_utils.merge_annotations; you can supply your own function if you want to change the default behaviour. If you have defined any outlier workers in <self.outliers>, their annotations will not be considered. If some annotations are empty (i.e. their entry in <self.resultFilename> is "no annotations"), their number will be printed. """ entries = utils.readResultFile(self.resultFilename) no_annotation = 0 updatedict = {} # imageID: [annotation_1, ..., annotation_n] rejected = utils.readFile(os.path.join(self.logDir, 'rejected')) for entry in entries: annotation = entry[6] assignmentID = entry[2] if assignmentID not in rejected: if annotation == "no annotation": no_annotation += 1 else: imageID = ",".join(annotation.split(",")[:2]) coordinate_list = [float(value) for value in annotation.split(",")[2:]] point_list = zip(*[iter(coordinate_list)] * 2) polygon = QPolygonF([QPointF(x, y) for x, y in point_list]) updatedict.setdefault(imageID, []).append(polygon) if no_annotation > 0: logging.info("{0} empty annotations!".format(no_annotation)) # Merge best matching outlines and update XML file from evaluation import PolygonList result_dict = {} for imageID in updatedict: pol_list = PolygonList(updatedict[imageID]) result_dict[imageID] = pol_list.mergeBestMatchingPolygons() self.videoLabelHandler.update(result_dict) self.videoLabelHandler.write(self.newProjFile) self.parent().status.emit("Please open new project file")
def createLibvirtXML(name, mac, img, template): """ Creates, define and start VM XML @type name: str @param name: vm name @type mac: str @param mac: mac address of the new VM @type img: str @param img: img path @type template: str @param template: template name @rtype: None @returns: Nothing """ # fill xml file template = os.path.join(os.getcwd(), "distros/", template) newXML = utils.readFile(template) newXML = newXML % {"name": name, "disk_path": img, "mac_address": mac } # write xml file tmpfile = tempfile.mkstemp()[1] utils.recordFile(newXML, tmpfile) # define vm print "Defining new VM %s" % name if 0 != os.system(DEFINE_VM % tmpfile): print "Cannot define %s" % tmpfile sys.exit(1) os.system("rm -f %s" % tmpfile) # start vm os.system(START_VM % name) os.system(AUTOSTART % name)
def status(self): """ Prints worker IDs sorted by number of assignments they completed, as well as the number of completed assignments and the number of total assignments. """ workerdict = {} hit_ids = utils.readFile(self.hitslog_filename) assignments = 0 self.parent().statusBar.children()[2].setRange(0, len(hit_ids)) for i, hit_id in enumerate(hit_ids): for n, assignment in enumerate(self.connection.get_assignments(hit_id=hit_id)): assignments += 1 workerdict.setdefault(assignment.WorkerId, 0) workerdict[assignment.WorkerId] += 1 self.parent().status.emit("{0}... # {1}".format(hit_id[:10], n + 1)) self.parent().progress.emit(i + 1) # Sort according to number of solved hits items = sorted(workerdict.items(), key=lambda tuple: tuple[1]) return items, assignments
def tokenise(source): program = readFile(source) token = "" tokens = [] posInSrc = [0, 1] # 0th char of 1st line for char in program: posInSrc[0] += 1 if char == "\n": posInSrc[1] += 1 # increment line by one posInSrc[0] = 1 if char.strip() == "" or char in list(tokenTypes.keys()): tkType = identifyToken(token) if tkType != False: tokens.append(Token(token, tkType, posInSrc.copy())) token = "" tkType = identifyToken(char.strip()) if tkType != False: tokens.append(Token(char, tkType, posInSrc.copy())) else: token += char return tokens
def main(): lines = readFile(segpath) dic = buildDic(lines) dicsp = buildSpDic(lines) dicnum = buildNumDic(lines) dicen = buildEnDic(lines) dicpt = buildPtDic(lines) dicnr = buildNrDic(lines) dicns = buildNsDic(lines) dicnsnr = buildNsNrDic(lines) dicnsrum = buildNsrumDic(lines) writeDic(dicpath, dic) writeDic(sp_dicpath, dicsp) writeDic(num_dicpath, dicnum) writeDic(en_dicpath, dicen) writeDic(pt_dicpath, dicpt) writeDic(nr_dicpath, dicnr) writeDic(ns_dicpath, dicns) writeDic(nsnr_dicpath, dicnsnr) writeDic(nsrum_dicpath, dicnsrum)
def deleteHit(self): """ Delete all HITs that belong to one project (either in sandbox or the actual MTurk). Assignments that were neither approved nor rejected are automatically approved (i.e. paid). """ self.parent().status.emit("Deleting HITs") hit_ids = utils.readFile(self.hitslog_filename) self.parent().statusBar.children()[2].setRange(0, len(hit_ids)) for i, hit_id in enumerate(hit_ids): detailed_hit = self.connection.get_hit(hit_id=hit_id)[0] if detailed_hit.HITStatus == "Reviewable": for assignment in self.connection.get_assignments(hit_id=hit_id): if assignment.AssignmentStatus == "Submitted": self.connection.approve_assignment(assignment.AssignmentId) self.connection.dispose_hit(hit_id) else: self.connection.disable_hit(hit_id) # in all other cases self.parent().progress.emit(i + 1) self.parent().status.emit("Done") return True
def start_requests(self): try: file = self.file mode = Mode.FILE except AttributeError: mode = Mode.KEYWORD header = random.choice(self.headers) if (mode == Mode.KEYWORD): url = "https://www.amazon.com/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=" + self.keyword yield scrapy.Request(url=url, callback=self.parse, headers={"user-agent": header}, cookies=self.cookies) else: links = readFile(file) notif(header) header = random.choice(self.headers) for link in links: if "http" in link: yield scrapy.Request(url=link, callback=self.parse_product, headers={"user-agent": header}, cookies=self.cookies) else: notif("INVALID LINK: " + link)
def _prepare(self, update, packager, distRelease): # create tmp directory currentDir = commands.getoutput("pwd"); tmpDir = tempfile.mkdtemp("%s-%s_srpm" % (config.APP_NAME, update.packageName)) os.chdir(tmpDir) cvsDir = "%s/%s" % (update.packageName, distRelease.tag) # get sources status = os.system("CVSROOT=:pserver:[email protected]:/cvs/pkgs cvs co %s" % cvsDir) if status != 0: raise # os.chdir(cvsDir) status = os.system("make -C %s" % cvsDir) if status != 0: raise major, minor, micro, qualifier = utils.parseBundleVersion(update.bundles[0].version) specWriter = WriteSpecfile() specWriter.run(update, packager, distRelease) specContent = utils.readFile(specWriter.outputFile) try: m = re.search("source[0|:].*[\ |\t]+(.*)", specContent, re.IGNORECASE) src_url = m.group(1) src_url = src_url.replace("%{major}", major) src_url = src_url.replace("%{minor}", minor) src_url = src_url.replace("%{micro}", micro) # fix eclipse-pydev define?? src_url = src_url.replace("%{maint}", micro) status = os.system("wget %s" % src_url) if status != 0: raise status = os.system("make -C %s srpm" % cvsDir) if status != 0: raise except: # try to grab sources using fetch-* scripts?? raise os.chdir(currentDir)
def LM_one_gram_OOV_seg(textpath, dic, sum): textlines = readFile(textpath) textSize = len(textlines) seg = [] # count = 0 startTime = time() print(datetime.datetime.now()) for i in range(textSize): sen = textlines[i].strip() if len(sen) == 0: seg.append("") continue linebegin = sen[:19] sen = sen[19:] DAG = build_DAG(sen, dic) sen_seg = linebegin + "/ " + "/ ".join(seg_sen(sen, DAG, dic, sum)) + "/ " seg.append(sen_seg) # print(datetime.datetime.now()) endTime = time() print((endTime - startTime)) return seg
def build_prefix_dict(dicpath): prefix_dict = {} # dic with frequence sum = 0 # total words num, including same words lines = readFile(dicpath) for line in lines: if len(line) == 0: continue tmp = line.strip().split(" ") # the dic format: word POS times word, freq = tmp[0], int(tmp[2]) if word not in prefix_dict: prefix_dict[word] = freq else: prefix_dict[word] += freq sum += freq # for word in dict, get the prefix of it for ch in range(len(word)): prefix = word[:ch + 1] if prefix not in prefix_dict: prefix_dict[prefix] = 0 return prefix_dict, sum
def main(): fileName = sys.argv[1] bootCode = readFile(fileName=fileName) # Part 1 result = executeCode(bootCode) print("The accumulated value: {0}, Error: {1}".format( result["value"], result["error"])) # Part 2 for row in range(1, len(bootCode)): newCode = copy.deepcopy(bootCode) if newCode[row][0:3] != "acc": if newCode[row][0:3] == "jmp": newCode[row] = newCode[row].replace("jmp", "nop") result = executeCode(newCode) elif newCode[row][0:3] == "nop": newCode[row] = newCode[row].replace("nop", "jmp") result = executeCode(newCode) if result["error"] == 0: break print("The accumulated value: {0}, Error: {1}".format( result["value"], result["error"]))
def sendFile(self, filename, sendTo): start_time = time.time() try: self.localSocket.connect(sendTo) except: print("Cannot connect to {}.".format(sendTo), file=sys.stderr) sys.exit(-1) print("start to send file: " + filename) # remove prefix bfilename = bytes(filename.split('/')[-1], 'utf8') l = len(bfilename).to_bytes(8, byteorder='big') padding = DATA_LENGTH - len(bfilename) - len(l) # send metadata self.send(l + bfilename + bytes(padding)) for data in readFile(filename, chunk_size=DATA_LENGTH): self.send(data) # end flag self.send(bytes(DATA_LENGTH), flag=2) end_time = time.time() print("\nTook {:.2f}s to transfer.".format(end_time - start_time))
def main(): fileName = sys.argv[1] content = readFile(fileName) xmasCode = [int(x) for x in content] if "example" in fileName: preLen = 5 else: preLen = 25 wrongNb = [] for row in range(preLen, len(xmasCode)): [isValid, high, low] = checkForSum(xmasCode[row], xmasCode[row - preLen:row]) if not isValid: wrongNb.append(xmasCode[row]) print(wrongNb) invalidNb = wrongNb[0] lowLim = 0 highLim = 1 while lowLim < len(xmasCode) and highLim <= len(xmasCode): # print(xmasCode[lowLim:highLim]) currentSum = sum(xmasCode[lowLim:highLim]) if invalidNb == currentSum: print("Set: ", xmasCode[lowLim:highLim]) minValue = min(xmasCode[lowLim:highLim]) maxValue = max(xmasCode[lowLim:highLim]) print("{0} + {1} = {2}".format(minValue, maxValue, minValue + maxValue)) break elif invalidNb > currentSum: highLim += 1 elif invalidNb < currentSum: lowLim += 1 highLim = lowLim + 1
def LM_two_gram_seg(textpath, dic, bi_dic): textlines = readFile(textpath) textSize = len(textlines) seg = [] # count = 0 startTime = time() for i in range(textSize): sen = textlines[i].strip() if len(sen) == 0: sen_seg = [] seg.append(sen_seg) continue linebegin = sen[:19] sen = sen[19:] sen = "<BOS>" + sen + "<EOS>" route = {} DAG = build_DAG(sen, dic) sen_seg = cal_route_two(sen, DAG, route, dic, bi_dic) sen_seg.insert(0, linebegin) seg.append(sen_seg) endTime = time() print((endTime - startTime)) return seg
df = df.drop(columns=[column]) related_columns = [ col for col in df.columns if col.startswith(column + '__') ] df = df[related_columns] df = df.drop_duplicates() assert (len(df) <= 1) if len(df) == 0: return [0 for col in df[column].unique().tolist()[:-1]] return df.values.tolist()[0] if __name__ == "__main__": if util.final == True: columns, data = util.readFile('dating-full.csv', None) else: columns, data = util.readFile('test_dataset.csv') # Answer to question 1.i data = data[:6500] # Preprocess similar to assignment 2 1.i pr.stripQuotes(data, ['race', 'race_o', 'field']) # Preprocess similar to assignment 2 1.ii pr.toLowerCase(data, ['field']) # Preprocess similar to assignment 2 1.iv pr.normalizeColumns(data, util.psParticipants, util.psPartners)
# """ # # tkg = "[kg] "+",".join(data['kg'])+" [/kg] "+data['sentence'] # data['sentence_b']=",".join(data['kg']) # p = Tclass.pre(data['sentence'], data['sentence_b']) # softmax = Tclass.softmax() # Tclass.release # print("分类", "|", '概率') # pre = [] # for ck, rank in zip([1, 2], softmax): # print(ck, "|", rank) # pre.append([ck, round(rank, 4)]) # # del Tclass # gc.collect() # return p+1, pre path = "/mnt/data/dev/tdata/wiki_zh" relations_all = [] for f in tfile.all_path(path): # para = readFile('./wiki_00') print(f) para = readFile(f) relations, dict_DSNF = getRelation(para) relations_all = relations_all + relations print("Finished !") print("Final result: ") # print(relations) for i, it in enumerate(relations_all): print(i, it) # print(dict_DSNF)
from relation_extraction import getRelation from utils import readFile para = readFile('./wiki_00') relations, dict_DSNF = getRelation(para) print("Finished !") print("Final result: ") print(relations) print(dict_DSNF)
import pandas as pd import numpy as np import utils as util import matplotlib.pyplot as plt if util.final: columns, data = util.readFile('dating.csv') else: columns, data = util.readFile('test_dating.csv') for col in util.rPP: distinctValues = np.sort(data[col].unique()) success_rates = {} for val in distinctValues: dfWithVal = data[data[col] == val] dfWithValSuccess = dfWithVal[dfWithVal.decision == 1] success_rates[val] = len(dfWithValSuccess) / len(dfWithVal) plt.scatter(distinctValues, [success_rates[val] for val in distinctValues], marker='o', s=50) plt.xlabel(col) plt.ylabel('Success Rate') if util.final: plt.savefig('outputs/2_2/success_rate_' + col + '.pdf', format='pdf') else: plt.savefig('outputs/2_2/test_success_rate_' + col + '.pdf', format='pdf') plt.clf()
from utils import getFilename, readFile from collections import Counter, defaultdict filename = 'day14.txt' output = readFile(filename, str, sepChar='\n') template = output[0] rules = dict() for i in range(2, len(output)): temp = output[i].split() rules[temp[0]] = temp[2] ### PART 1 ### for step in range(10): new_template = '' for i in range(len(template)-1): new_template += template[i] if template[i:i+2] in rules: new_template += rules[template[i:i+2]] new_template += template[-1] template = new_template template_counts = Counter(template).most_common() print(template_counts[0][1] - template_counts[-1][1]) ### PART 2 ### template = output[0] template_dict = defaultdict(int) for i in range(len(template)-1): template_dict[template[i:i+2]] += 1
#t, p_value = stats.ttest_rel(lr_results, svm_results) differences = np.array(lr_results, dtype='float32') - np.array( svm_results, dtype='float32') top = np.average(differences) var = np.var(differences, ddof=1) sed = np.sqrt(var / len(lr_results)) print(top / sed) if __name__ == "__main__": if util.final: columns, data = util.readFile('trainingSet.csv') else: columns, data = util.readFile('test_trainingSet.csv') number_of_folds = 10 # Answer to the question 3.i folds = generate_folds(data, number_of_folds) # Answer to the question 3.ii t_fracs = [0.025, 0.05, 0.075, 0.1, 0.15, 0.2] fold_size = len(folds[0]) train_sizes = [ fold_size * (number_of_folds - 1) * frac for frac in t_fracs ] model_idxs = [1, 2, 3]
from utils import readFile lineArr = readFile('inputs/inputTask3.txt') def task3a(lineArr): treeCount = 0 elem = 0 for line in lineArr: if line[elem] == "#": treeCount = treeCount + 1 elem = (elem + 3) % 31 return treeCount def task3b(lineArr): vStepSize = [1, 1, 1, 1, 2] hStepSize = [1, 3, 5, 7, 1] treeCountProduct = 1 for step in range(len(vStepSize)): treeCount = 0 elem = 0 for line in lineArr[::vStepSize[step]]: if line[elem] == "#": treeCount = treeCount + 1 elem = (elem + hStepSize[step]) % 31 treeCountProduct = treeCountProduct * treeCount
import sys from utils import getFilename, readFile filename = getFilename(sys.argv) output = readFile(filename, str) output = [list(o) for o in output] originalOutput = output[:] def getResult(output, printOutput=True): result = [] for o in output: result.append("".join(o)) if printOutput: print(result[-1]) result = "".join(result) return result ### PART 1 ### def adjOccupiedNum(output, row, col): check = [] for i in range(max(row - 1, 0), min(row + 2, len(output))): for j in range(max(col - 1, 0), min(col + 2, len(output[0]))): if not i == row or not j == col: check.append((i, j))
slash = '/' subclass = './subclasses' test = './../dataset/test' farm = './../dataset/farm' bird = './../dataset/birds' animal = './../dataset/animals' TP = 0 FP = 0 TN = 0 FN = 0 # Dictionary that stores list of subclasses of class subclass_dict = {} for file in os.listdir(subclass): tempList = utils.readFile(subclass + slash + file) subclass_dict[file.split('.')[0]] = tempList false_subclass = utils.readFile('./farm.txt') def getClass(imagePath): tempvgg = vggModel(imagePath) tempres = resModel(imagePath) subclass_list = [] for tup in tempvgg: subclass_list.append(tup[1]) for tup in tempres: