def addDownload2(self):
		start=timeit.default_timer()
		global urlname
		global urlsize
		global urlstatus
		global urltype
		global model
		global idx
		url=str(self.dialog.lineEdit_6.text())
		urlname = url.split('/')[-1]
		response=urllib2.urlopen(url)
		self.meta=response.info()
		urlsize=int(self.meta.getheaders('Content-Length')[0])
		if 1000<urlsize<1000000:
			urlsize=str(urlsize/1000.0) + " KB"
		elif 1000000<urlsize<10**9:
			urlsize=str(1.0*urlsize/10**6) + " MB"
		else:
			urlsize=str(1.0*urlsize/10**9) + " GB"
		self.dialog.label_17.setText(urlsize)
		self.dialog.label_68.setText(urlname)
		f=open(urlname,'wb')
		f.write(response.read())
		stop=timeit.default_timer()
		speed=float(urlsize.split(' ')[0])/(stop-start)
		speed=str(speed) + " " + urlsize.split(' ')[-1] + "/s"
		self.dialog.label_67.setText("100 %")
		self.dialog.label_69.setText(speed)
		self.dialog.label_70.setText(str(stop-start))
Пример #2
0
    def test(self, view):
        """
        Calls the given view and measures the time for it to return. The
        garbage collector is diabled during execution.
        """
        gc_old = gc.isenabled()
        gc.disable()
        try:
            start = timeit.default_timer()
            if view.method == 'GET':
                response = self.client.get(view.url, view.data)
            elif view.method == 'POST':
                response = self.client.post(view.url, view.data)
            else:
                raise ValueError('Unknown view method: %s' % view.method)

            end = timeit.default_timer()
            # Return result in milliseconds
            time_ms = (end - start) * 1000
            # Try to get version information
            version = subprocess.check_output(['git', 'describe'])

            from .models import TestResult
            return TestResult(view=view, time=time_ms, result=response,
                              result_code=response.status_code, version=version)
        finally:
            if gc_old:
                gc.enable()
Пример #3
0
 def measured(self, msg, *args, **kwargs):
     self.info(msg + '..', *args, **kwargs)
     start_time = timeit.default_timer()
     yield
     end_time = timeit.default_timer()
     self.info("%s took %.2fs." % (msg, end_time - start_time),
             *args, **kwargs)
Пример #4
0
    def __download_range(self, k, dst):
        try:
            _, ext = os.path.splitext(dst)
            ds = []
            parts = []

            logging.info("Download %s start", k.name)

            for startByte in range(0, k.size, self.splitMB):
                output_part = self.new_temp_file(suffix=ext)
                parts.append(output_part)

                endByte = min(startByte + self.splitMB - 1, k.size)

                logging.debug(
                    "deferToThreadPool %s start=%d end=%d size=%d cnt=%d",
                    k.name,
                    startByte,
                    endByte,
                    endByte - startByte,
                    len(ds),
                )

                d = twisted.internet.threads.deferToThreadPool(
                    reactor,
                    reactor.getThreadPool(),  # @UndefinedVariable
                    self.__downloadOne,
                    k,
                    startByte,
                    endByte,
                    output_part,
                    len(ds),
                )
                ds.append(d)

            if os.path.exists(dst):
                os.remove(dst)
            fout = file(dst, "wb")
            start = timeit.default_timer()
            for cnt, p in enumerate(parts):
                yield ds[cnt]
                shutil.copyfileobj(file(p, "rb"), fout)

                size = min(k.size, (cnt + 1) * self.splitMB)
                elapsed = timeit.default_timer() - start
                speedstr = formatFileSize(size / elapsed)
                sizestr = formatFileSize(size)
                percent = (float(cnt) / len(parts)) * 100.0
                logging.info(
                    "%03d/%03d (%.2f%%) speed=%s/s, elapsed=%.2f, size=%s",
                    cnt,
                    len(parts),
                    percent,
                    speedstr,
                    elapsed,
                    sizestr,
                )
        except Exception:
            logging.error("download error", exc_info=True)
            raise
Пример #5
0
def main():
    start_time = timeit.default_timer()

    proxies = []

    targets = ['http://www.google-proxy.net/','http://free-proxy-list.net/']

    for i in range(len(targets)):
        proxy = proxy_scraper(targets[i])

        for u in range(len(proxy)):
            proxy_found = str(proxy[u]['ip'])+":"+str(proxy[u]['port'])
            if proxy_found not in proxies:
                if  is_proxy_existed(proxy[u]['ip']) != True:
                    print proxy[u]['ip'] +" - "+ proxy[u]['port'] +" - "+ proxy[u]['hostname']
                    create_proxy(proxy[u]['ip'], proxy[u]['port'], proxy[u]['hostname'], proxy[u]['service'], proxy[u]['latitude'], proxy[u]['longitude'], proxy[u]['city'], proxy[u]['country'])
                    proxies.append(proxy_found)

    # save to a file
    file_name = "data_proxies.cfg"
    write_file( file_name, "\n".join(proxies) )
    print("\n%s proxies found. File saved. You can find it under '%s'." % (len(proxies), file_name))

    # measure time
    print "\nElapsed time: %d sec" % (timeit.default_timer() - start_time)
Пример #6
0
def create_features(features):
    import timeit
    source = load_source()
    start = timeit.default_timer()
    compute_features(source, features)
    end = timeit.default_timer()
    print("save all features takes ", (end-start))
Пример #7
0
 def __execEvent__(self, eventName, ntime, commandHandler):
     last = self.__events__[eventName]["lastExecTime"]
     timeInterval = self.__events__[eventName]["timeInterval"]
     
     if ntime - last >= timeInterval:
         start = default_timer()
         
         self.__events__[eventName]["function"](commandHandler, self.__events__[eventName]["channels"])
         
         timeTaken = default_timer() - start
     
         stats = self.__events__[eventName]["stats"]
         
         if stats["average"] == None:
             stats["average"] = timeTaken
             stats["min"] = timeTaken
             stats["max"] = timeTaken
         else:
             stats["average"] = (stats["average"]+timeTaken) / 2.0
             
             if timeTaken < stats["min"]:
                 stats["min"] = timeTaken
                 
             if timeTaken > stats["max"]:
                 stats["max"] = timeTaken
         
         self.__events__[eventName]["lastExecTime"] = time.time()
def main():
    print "[Facebook Album Downloader v1]"
    start = timeit.default_timer()

    # hide images
    prefs = {"profile.managed_default_content_settings.images": 2}
    extensions = webdriver.ChromeOptions()
    extensions.add_experimental_option("prefs", prefs)
    browser = webdriver.Chrome(executable_path="chromedriver", chrome_options=extensions)

    findAlbum(browser)
    createAlbumPath()

    queue = Queue()

    for x in range(max_workers):
        worker = DownloadWorker(queue)
        worker.daemon = True
        worker.start()

    print "[Getting Image Links]"
    linkImages = getImageLinks(browser)
    print "[Found: " + str(len(linkImages)) + "]"

    for fullRes in linkImages:
        queue.put(fullRes)

    print "[Downloading...]"
    queue.join()

    browser.quit()

    stop = timeit.default_timer()
    print "[Time taken: %ss]" % str(stop - start)
    raw_input("Press any key to continue...")
Пример #9
0
def read_features(features):
    """
        read all the features in the 'features' array and return a numpy array
        currently only compute the grand mean and std
    """
    start = timeit.default_timer()
    x = []
    y = []
    for fn in glob.glob(os.path.join(FT_DIR, "*.npy")):
        start = fn.rfind('/')
        end = fn.rfind('.')
        ext = fn[start+1:end]
        genre, _= ext.split('_')
        data = np.load(fn)
        surface_ft = data[:-1] #5 features
        ft_vec = [np.mean(ft) for ft in surface_ft] + [np.std(ft) for ft in surface_ft]

        ceps = data[-1]#mfcc features
        cep_len = len(ceps)
        ft_vec += np.mean(ceps[int(cep_len / 10.):int(cep_len * 9 / 10.)], axis=0).tolist()
        x.append(ft_vec)
        y.append(GENRE_DICT[genre])

    end = timeit.default_timer()
    print("reading all features takes: ", (end - start))

    return np.array(x), np.array(y)
    def index_project(self, project_name):
        project_data = self.watcher.projects[project_name]["project_data"]
        cfc_folders = project_data.get(self.folder_key, [])
        mappings = project_data.get("mappings", [])
        project_file_dir = os.path.dirname(project_name)

        if len(cfc_folders) == 0:
            return

        start_time = timeit.default_timer()
        index = {}

        print("CFML: indexing components in project '" + project_name + "'")

        for cfc_folder in sorted(cfc_folders, key=lambda d: d["path"]):
            root_path = utils.normalize_path(cfc_folder["path"], project_file_dir)
            path_index = self.parser.parse_directory(root_path)
            index.update(path_index)

        self.data[project_name] = {
            "index": index,
            "cache": {file_path: {} for file_path in index}
        }
        self.build_project_data(project_name)

        index_time = timeit.default_timer() - start_time
        message = "CFML: indexing components in project '{}' completed - {} files indexed in {:.2f} seconds"
        print(message.format(project_name, str(len(index)), index_time))
        self.notify_listeners(project_name)
Пример #11
0
    def trim_data(crime_data, part, total_parts):
        print 'Trimming unnecessary data...',
        time1 = tm.default_timer()
        crime_data = crime_data[crime_data['YEAR'] >= 2006]
        crime_data = crime_data[crime_data['YEAR'] <= 2015]
        crime_data = crime_data[pd.notnull(crime_data['NEIGHBOURHOOD'])]
        crime_data = crime_data.drop('HUNDRED_BLOCK', axis=1)
        crime_data = crime_data.sort_index()


        if TEST_VAL:
            print 'Taking subset of crime data (1000 row sample)...',
            crime_data = crime_data.head(1005)

        if part is not None and total_parts is not None:
            start_index = int(1.0*(part-1)/total_parts*crime_data['YEAR'].count())
            end_index = int(1.0*part/total_parts*crime_data['YEAR'].count())

            if part == total_parts: end_index = crime_data['YEAR'].count()

            crime_data = crime_data[start_index:end_index]

            print 'Start index, end index, size:',start_index,end_index, crime_data['YEAR'].count()

        print 'Finished'
        print 'Time taken:', tm.default_timer()-time1, ' seconds\n'
        return crime_data
Пример #12
0
def launch_jobs(quandl_codes, num_workers, calc_date, authtoken="", freq='M', span=60):
    job_queue = Queue.Queue()
    for b in quandl_codes:
        job_queue.put(b)
    print "Length %d"%job_queue.qsize()
    
    thlist = []
    s_time  = timeit.default_timer()
    fp = open("output.csv","w")
    heading = "Ticker, Date, "+",".join(Worker._itemlist)+"\n"
    fp.write(heading)
    
    s_date = dutil.shift_months(calc_date, -(span+6))
    trim_start = s_date.strftime('%Y-%m-%d')
    trim_end  = calc_date.strftime('%Y-%m-%d')

    calc_param = {"calc_date": calc_date, "freq" : freq, "span":60}
    for i in range(num_workers):
        th = Worker(job_queue, trim_start, trim_end, calc_param, authtoken, fp)
        th.daemon = True
        th.start()
        thlist.append(th)
    print "Finished launching jobs"
    e_time = timeit.default_timer()
    print "Time taken ",(e_time - s_time)
    
    # block until the queue is empty
    job_queue.join()
Пример #13
0
def spawn_runpy(cp, wait=60, cb=check_rst):
    "as decorator to run job"
    global WAITQ, RUNQ, CFG
    pool = Pool(processes=CFG['MAXJOBS'])
    while len(WAITQ) > 0 or len(RUNQ) > 0:
        if len(RUNQ) <= CFG['MAXJOBS'] and len(WAITQ) > 0:
            path, test = WAITQ.pop()
            rst = pool.apply_async(call_runpy, (cp, path, test,))
            RUNQ.append((rst, test, timeit.default_timer()))
        else:
            for r in RUNQ:
                usec = float("%.2f" %(timeit.default_timer()-r[2]))
                if r[0].successful:
                    print "[{0}] success used {1} usec".format(r[1], usec)
                    RUNQ.remove(r)
                    if cb:
                        cb(r[1], 'pass', usec)
                else:
                    if usec > CFG['TIMEOUT']:
                        print "[{0}] unsuccess used timeout {1} usec".format(r[1], usec)
                        r[0].terminate()
                        if cb:
                            cb(r[1], 'fail', usec)

        time.sleep(float(wait))
Пример #14
0
    def test_exercise_6(self):
        con = self.con
        con.isolation_level = None
        cur = con.cursor()

        N = 30000

        #############################
        # Exercise 6
        #
        # Change the following schema to include an index on column "a".
        cur.execute('CREATE TABLE "numbers" (a INTEGER)')
        #
        #
        #############################

        rows = []
        for i in range(0, N):
            rows.append( (i,) )
        
        cur.executemany('INSERT INTO "numbers" VALUES (?)', rows)
 
        start_time = timeit.default_timer()
        cur.execute('select min(a) from numbers')
        print("exercise_6: That took %f ms." % ((timeit.default_timer() - start_time) * 1000,))

        data = cur.fetchall()
        cur.close()
        self.assertTrue(data[0][0] == 0)
def evaluate(im, algo, gt_illuminant, i, range_thresh, bin_num, dst_folder):
    new_im = None
    start_time = timeit.default_timer()
    if algo=="grayworld":
        new_im = cv2.xphoto.autowbGrayworld(im, 0.95)
    elif algo=="nothing":
        new_im = im
    elif algo=="learning_based":
        new_im = cv2.xphoto.autowbLearningBased(im, None, range_thresh, 0.98, bin_num)
    elif algo=="GT":
        gains = gt_illuminant / min(gt_illuminant)
        g1 = float(1.0 / gains[2])
        g2 = float(1.0 / gains[1])
        g3 = float(1.0 / gains[0])
        new_im = cv2.xphoto.applyChannelGains(im, g1, g2, g3)
    time = 1000*(timeit.default_timer() - start_time) #time in ms

    if len(dst_folder)>0:
        if not os.path.exists(dst_folder):
            os.makedirs(dst_folder)
        im_name = ("%04d_" % i) + algo + ".jpg"
        cv2.imwrite(os.path.join(dst_folder, im_name), stretch_to_8bit(new_im))

    #recover the illuminant from the color balancing result, assuming the standard model:
    estimated_illuminant = [0, 0, 0]
    eps = 0.01
    estimated_illuminant[2] = np.percentile((im[:,:,0] + eps) / (new_im[:,:,0] + eps), 50)
    estimated_illuminant[1] = np.percentile((im[:,:,1] + eps) / (new_im[:,:,1] + eps), 50)
    estimated_illuminant[0] = np.percentile((im[:,:,2] + eps) / (new_im[:,:,2] + eps), 50)

    res = np.arccos(np.dot(gt_illuminant,estimated_illuminant)/
                   (np.linalg.norm(gt_illuminant) * np.linalg.norm(estimated_illuminant)))
    return (time, (res / np.pi) * 180)
Пример #16
0
  def execute(self):
    start_time = timeit.default_timer()
    response = self.svc.call()

    end_time = timeit.default_timer()
    self.elapsed_time = end_time - start_time
    return self.validate(response)
Пример #17
0
    def runTestCode(self):
        """
        This function ties into the debug menu. It is meant to allow execution
        of some test code. Feel free to change the contents of this function.
        """
        start = timeit.default_timer()

        monsters = []
        lib = Libraries.MonsterLibrary()

        stop = timeit.default_timer()
        time = stop - start
        print "Created library in " + str(time) + " seconds"

        for i in range(0, 10000):
            myRandom = lib.getRandomMonster(random.randint(0, 80))
            monsters.append(myRandom)

        # lib = Libraries.ItemLibrary()
        # myItem = lib.createItem('heal')
        # print myItem
        # myItem = lib.createItem('sword')
        # print myItem
        # myItem = lib.createItem('cloak')
        # print myItem
        # myItem = lib.createItem('fireball')
        # print myItem

        stop = timeit.default_timer()
        time = stop - start
        print "Created " + str(len(monsters)) + " monsters in " + str(time) + " seconds"
Пример #18
0
    def scan_vocab(self, documents, progress_per=10000, trim_rule=None):
        logger.info("collecting all words and their counts")
        document_no = -1
        total_words = 0
        min_reduce = 1
        interval_start = default_timer() - 0.00001  # guard against next sample being identical
        interval_count = 0
        vocab = defaultdict(int)
        for document_no, document in enumerate(documents):
            if document_no % progress_per == 0:
                interval_rate = (total_words - interval_count) / (default_timer() - interval_start)
                logger.info("PROGRESS: at example #%i, processed %i words (%i/s), %i word types, %i tags",
                            document_no, total_words, interval_rate, len(vocab), len(self.docvecs))
                interval_start = default_timer()
                interval_count = total_words
            document_length = len(document.words)

            for tag in document.tags:
                self.docvecs.note_doctag(tag, document_no, document_length)

            for word in document.words:
                vocab[word] += 1
            total_words += len(document.words)

            if self.max_vocab_size and len(vocab) > self.max_vocab_size:
                utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
                min_reduce += 1

        logger.info("collected %i word types and %i unique tags from a corpus of %i examples and %i words",
                    len(vocab), len(self.docvecs), document_no + 1, total_words)
        self.corpus_count = document_no + 1
        self.raw_vocab = vocab
Пример #19
0
    def run(self):
        try:
            self.running = True
            self.logger.info("[Relay] Relay controller activated in "
                             "{}ms".format((timeit.default_timer()-self.thread_startup_timer)*1000))
            while (self.running):
                current_time = datetime.datetime.now()
                for relay_id in self.relay_id:
                    if (self.relay_on_until[relay_id] < current_time and
                            self.relay_on_duration[relay_id] and 
                            self.relay_pin[relay_id]):
                        
                        # Use threads to prevent a slow execution of a
                        # process that could slow the loop
                        turn_relay_off = threading.Thread(
                            target=self.relay_on_off,
                            args=(relay_id, 'off',))
                        turn_relay_off.start()

                        if self.relay_last_duration[relay_id] > 0:
                            write_db = threading.Thread(
                                target=write_influxdb,
                                args=(self.logger, INFLUXDB_HOST,
                                      INFLUXDB_PORT, INFLUXDB_USER,
                                      INFLUXDB_PASSWORD, INFLUXDB_DATABASE,
                                      'relay', relay_id, 'duration_sec',
                                      float(self.relay_last_duration[relay_id]),))
                            write_db.start()

                time.sleep(0.01)
        finally:
            self.all_relays_off()
            self.running = False    
            self.logger.info("[Relay] Relay controller deactivated in "
                             "{}ms".format((timeit.default_timer()-self.thread_shutdown_timer)*1000))
Пример #20
0
 def train(self, examples, cv_extract, epochs, learning_rate):
   """
   Specializes the network for prediction on the given examples, using the
   given center extract function, the given number of epochs, and the given
   learning rate.
   """
   input = T.vector(name="training_input", dtype=theano.config.floatX)
   tf = self.get_specialization_function(input, cv_extract, learning_rate)
   indices = list(range(examples.get_value(borrow=True).shape[0]))
   start_time = timeit.default_timer()
   # TODO: batches?
   for epoch in range(epochs):
     self.rng.shuffle(indices)
     costs = []
     for j in indices:
       cost = tf(examples.get_value(borrow=True)[j].reshape(-1))
       costs.append(cost)
     debug(
       "... [{}] epoch {: 3d} done {} ...".format(
         str(datetime.timedelta(seconds=timeit.default_timer()-start_time)),
         epoch + 1,
         "(min/avg cost {:0.3f}/{:0.3f})".format(
           float(float(min(costs))),
           float(float(sum(costs)/float(len(costs))))
         )
       )
     )
Пример #21
0
def simulate(new_N = N, new_R = R, new_D = D):
	global N 
	N = new_N
	global R 
	R = new_R
	global D 
	D = new_D
	global distance_arr
	distance_arr = [([0] * N) for i in xrange(N)]
	global sensor_network
	sensor_network = [Sensor(i) for i in range(N)]
	
	for x in xrange(N):
		for y in xrange(N):
			if x != y and distance_arr[x][y] == 0:
				distance_arr[x][y] = sensor_distance(sensor_network[x].position, sensor_network[y].position)
				distance_arr[y][x] = distance_arr[x][y]

	start = timeit.default_timer()

	[s.start() for s in sensor_network]
	[s.join() for s in sensor_network]

	stop = timeit.default_timer()
	
	return stop - start
Пример #22
0
    def evaluate(self, p, sim, plt):
        start = timeit.default_timer()
        sim.run(p.T)
        end = timeit.default_timer()
        speed = p.T / (end - start)

        data = sim.data[self.p_ens]

        last = []
        for row in data.T:
            nz = np.nonzero(row>0.05)[0]
            if len(nz) == 0:
                last.append(0)
            else:
                last.append(nz[-1])
        time_to_inhibit = np.array(last)*p.dt

        if plt:
            plt.plot(sim.trange(), sim.data[self.p_ens])
            for t in time_to_inhibit:
                plt.axvline(t)
            plt.axhline(0.05, linestyle='--', c='k')
            plt.xlabel('time (s) with increasing inhibition')
            plt.ylabel('decoded output')

        return dict(time_to_inhibit=np.mean(time_to_inhibit),
                    speed=speed)
Пример #23
0
def load_indicators_to_mongo_zh(is_incremental):
    print("start loading indicator data(zh) from JSON file to MongoDB...")
    all_start = timeit.default_timer()
    static = Static()
    f = io.open(static.output_folder + '/worldbank_wdi_indicators_zh.json', 'r', encoding='utf8', errors='ignore')
    json_str = f.readline()
    indicator_array = json.loads(json_str)
    f.close()
    client = MongoClient(static.mongo_url, static.mongo_port)
    db = client[static.database_name]
    ## print(db.collection_names())
    indicator_col = db[static.indicator_col_name]
    if not is_incremental:
        indicator_col.drop()
    for ind in indicator_array:
        indicator_key = ind['id'].replace('.', '_') + '_ZH'
        data_type = 'number'
        if(ind['name'].find('百分比') > -1):
            data_type = 'percentage'
        topics = []
        for topic in ind['topics']:
            topics.append(topic['value'])
        indicator_rec = {'indicator_key': indicator_key, 'original_id': ind['id'], 'indicator_text': ind['name'], 'data_type': data_type, 'sourceOrganization': ind['sourceOrganization'], 'sourceNote': ind['sourceNote'], 'topics': topics, 'data_source': '世界发展指标', 'dimension': [{'dimension_key': 'year', 'dimension_text': '年'}, {'dimension_key': 'region', 'dimension_text': '区域'}, {'dimension_key': 'country', 'dimension_text': '国家'}]}
        pk = indicator_col.insert(indicator_rec)
        print(indicator_key + ' ' + ind['name'] + ' inserted.')
    print("job is complete.")
    print("total records: " + str(indicator_col.count()))
    print("total time cost: " + str(round(timeit.default_timer() - all_start)) + 's')
Пример #24
0
 def _run_analyzers_on_event(self):
     '''Run all analysers on the current event, self.event. 
     Returns a tuple (success?, last_analyzer_name).
     '''
     for i,analyzer in enumerate(self._analyzers):
         if not analyzer.beginLoopCalled:
             analyzer.beginLoop(self.setup)
         start = timeit.default_timer()
         if self.memReportFirstEvent >=0 and iEv >= self.memReportFirstEvent:           
             memNow=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
             if memNow > self.memLast :
                print  "Mem Jump detected before analyzer %s at event %s. RSS(before,after,difference) %s %s %s "%( analyzer.name, iEv, self.memLast, memNow, memNow-self.memLast)
             self.memLast=memNow
         ret = analyzer.process( self.event )
         if self.memReportFirstEvent >=0 and iEv >= self.memReportFirstEvent:           
             memNow=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
             if memNow > self.memLast :
                print "Mem Jump detected in analyzer %s at event %s. RSS(before,after,difference) %s %s %s "%( analyzer.name, iEv, self.memLast, memNow, memNow-self.memLast)
             self.memLast=memNow
         if self.timeReport:
             self.timeReport[i]['events'] += 1
             if self.timeReport[i]['events'] > 0:
                 self.timeReport[i]['time'] += timeit.default_timer() - start
         if ret == False:
             return (False, analyzer.name)
     return (True, analyzer.name)
Пример #25
0
def load_rowdata_to_mongo_zh(is_incremental):
    print("start loading row data(zh) from JSON file to MongoDB...")
    all_start = timeit.default_timer()
    static = Static()
    bydim_dir = static.output_folder + static.dataset_bydim_folder
    
    client = MongoClient(static.mongo_url, static.mongo_port)
    db = client[static.database_name]
    dataset_col = db[static.dataset_col_name]
    if not is_incremental:
        dataset_col.drop()

    file_path_array = []
    for idx, file in enumerate(os.listdir(bydim_dir)):
        file_path = os.path.join(bydim_dir, file)
        if os.path.isfile(file_path):
            file_path_array.append(file_path)
    print(str(len(file_path_array)) + " files are loaded")

    counter = []
    mapfunc = partial(insert_by_dim, counter=counter, dataset_col=dataset_col, all_start=all_start)
    pool = ThreadPool(12)
    pool.map(mapfunc, file_path_array)
    pool.close() 
    pool.join()
    
    print("All the threads are completed. Total number is " + str(len(counter)) + "\n")
    print("total time cost: " + str(round(timeit.default_timer() - all_start)) + 's')
Пример #26
0
def loop_sd_mean(alphabet):
	print("======== sd-mean test===========")
	start = timeit.default_timer()
	count = 0
	letters_number_list = []
	entropy_list = []

	for i in list(range(1,101)): # this is  sd 
		alphabet1 = eliminate_sd(alphabet,i)
		for j in list(range(1,101)): # this is mean
			alphabet2 = eliminate_mean(alphabet1,j)
			letters_number = len(alphabet2)
			letters_number_list.append((i,j,letters_number))

			balanced_alphabet = rebalance(alphabet2) 
			entropy = calculate_entropy(balanced_alphabet)
			entropy_list.append((i,j,entropy))

			count = count+1
			print(count)


	stop  = timeit.default_timer()
	time = (stop - start)
	print (letters_number_list)
	print (entropy_list)
	print("======== sd-mean test===========")
	print('Running Time (s): %f' %time)
Пример #27
0
def worker(F, chargers, sensors, p_list, sensors_p, p_list_p):
    """worker function, used to create processing"""
    result = {}

    tic = timeit.default_timer()
    anser = reconfiguration.iaa.solution(chargers, sensors, p_list, args['B'], sensors_p, p_list_p, F, args['p_min'])
    toc = timeit.default_timer()
    result['IAA'] = (toc - tic, anser)
    if DEBUG:
        print "============================================="
        print "#               solution IAA                #"
        print "============================================="
        pprint(anser)

    tic = timeit.default_timer()
    anser = solution.solutionOpt.solution(chargers, sensors_p, p_list_p)
    toc = timeit.default_timer()
    result['Opt'] = (toc - tic, anser)
    if DEBUG:
        print "============================================="
        print "#               solution Opt                #"
        print "============================================="
        pprint(anser)

    return result
Пример #28
0
def main():
    """
    """
    logging.info("Reading file:%s", "data/sample.avi")
    vid = AoRecording.AoRecording(filepath="data/sample.avi")
    vid.load_video()
    logging.info("Starting parallel processing")
    tic = timeit.default_timer()
    vid.filter_frames()
    vid.fixed_align_frames()
    vid.complete_align_parallel()
    vid.create_average_frame()
    vid.create_stdev_frame()
    toc = timeit.default_timer()
    print "Parallel Process took {}:".format(toc - tic)

    vid.create_stdev_frame()

    logging.info("writing output")
    vid.write_video("output/output_parallel.avi")
    vid.write_average_frame("output/lucky_average_parallel.png")
    vid.write_frame("output/lucky_stdev.png", "stdev")

    logging.info("Starting serial processing")
    tic = timeit.default_timer()
    vid.filter_frames()
    vid.fixed_align_frames()
    vid.complete_align()
    vid.create_average_frame()
    toc = timeit.default_timer()
    print "Serial Process took {}:".format(toc - tic)

    logging.info("writing output")
    vid.write_video("output/output_serial.avi")
    vid.write_frame("output/lucky_average_serial.png", "average")
Пример #29
0
    def analyze_files(self, iterCount, loci_classes, adapt_threshold):
        Rmodel = VRmodel.VregMRmodel(iterCount, loci_classes, adapt_threshold)
        print "len(Rmodel.rfmodels)=", len(Rmodel.rfmodels)

        ofile = open("bkg_out.dat","a+")
        Rmodel.set_bckgoutfile( ofile )
        
        for species in self.speciesList:
            fbar= self.S[species]["WGS"]
            print fbar
            outFile = self.outDir + os.path.basename(fbar).replace(".fasta", "_"+str(iterCount)+"_outRF.fasta")
            ofile = open(outFile,"w")
            Rmodel.set_outfile( ofile )

            fb = self.outDir + os.path.basename(fbar).replace(".fasta", "_"+str(iterCount)+"_exon.fasta")  
            exfile1 = open(fb,"w")            
            Rmodel.set_exon_outfiles( exfile1 ) 

            start_time = timeit.default_timer()
            gene_cnt=0


            for strand in [1, -1]:
                qbar=deepcopy(self.contigs)
                print "STRAND=", strand
                for record in SeqIO.parse(fbar, "fasta"):
                    if self.check_contigs: 
                        if ( record.id.split("|")[3] not in self.contigs):
                            continue
                    print "record.id=", record.id
                    print "cnts=",record.id.split("|")[3]
                    print "qbar=", qbar
                    if self.check_contigs: 
                        qbar.remove(record.id.split("|")[3])
                    if strand == 1:
                        seq=record.seq
                    else:
                        seq=record.seq.reverse_complement()

                    Rmodel.set_record(record.id, record.name, record.description)
                    seq_size=len(seq)

                    res= self.mapper( divide_work(seq) )

                    """
                    print "len(res)=", len(res)
                    for ix in range(2):
                        print res[ix][0], res[ix][1], type(res[ix][2])
                    """

                    Elist=Rmodel.exon_MRprobabilities(res)
                    gene_cnt = Rmodel.V_exon_model(gene_cnt, seq, strand, Elist)
                    #res=None
                    #Elist=None
                    if len(qbar)==0: 
                        break

            ofile.close()
            elapsed = timeit.default_timer() - start_time
            print "ELAPSED TIME =", elapsed
Пример #30
0
 def pretrain(self, examples, epoch_counts, corruption_rates, learning_rates):
   """
   Trains the network for autoencoding on the given examples, given lists of
   epoch counts, corruption rates, and learning rates each equal in length to
   the number of layers in the stack.
   """
   tfs = self.get_training_functions(corruption_rates, learning_rates)
   indices = list(range(examples.get_value(borrow=True).shape[0]))
   start_time = timeit.default_timer()
   for i in range(len(self.layers)):
     # TODO: batches?
     for epoch in range(epoch_counts[i]):
       self.rng.shuffle(indices)
       costs = []
       for j in indices:
         cost = tfs[i](examples.get_value(borrow=True)[j].reshape(-1))
         costs.append(cost)
       debug(
         "... [{}] epoch {: 3d} at layer {: 2d} done {} ...".format(
           str(datetime.timedelta(seconds=timeit.default_timer()-start_time)),
           epoch + 1,
           i,
           "(min/avg cost {:0.3f}/{:0.3f})".format(
             float(min(costs)),
             float(sum(costs)/float(len(costs))),
           )
         )
       )
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.utils import resample
from sklearn.feature_selection import SelectFromModel
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.impute import SimpleImputer

import timeit

# In[2]: Import data

startzeitDaten = timeit.default_timer()

data = pd.read_csv(r"Data\SP500_data_new.csv"
                   ,parse_dates = ["adate", "qdate", "public_date"], dayfirst = True)#, index_col=["gvkey", "datadate"]) 
data_NaN = data.dropna()
data_y = data.dropna(subset =["splticrm"])

Names1 = pd.read_excel(r"Data\Names1.xlsx", header = 0)
Names1 = Names1.drop(["Data Type", "Help"], axis = 1)
Names1.columns = ["Name", "ExName"]
Names2 = pd.read_excel(r"Data\Names2.xlsx", header = 0)
Names2 = Names2.drop(["Data Type","Help"], axis = 1)
Names2.columns = ["Name", "ExName"]

features1RF_mean = pd.read_csv(r"Data\RF1_mean.csv", header = 0).dropna()
features1RF_mean.columns = ["Ort","Name", "Wert"]
Пример #32
0
 def __exit__(self, _1, _2, _3):
     self.t_end = timeit.default_timer()
     self.dt = self.t_end - self.t_start
Пример #33
0
 def __enter__(self):
     self.t_start = timeit.default_timer()
     return self
Пример #34
0
g.add_node(5, pos = (2,2))
g.add_node(6, pos = (3,1))
g.add_node(7, pos = (4,1))

# create the edges in the graph
g.add_edge(1,2, weight = 50)
g.add_edge(1,3, weight = 50)
g.add_edge(2,4, weight = 10)
g.add_edge(2,5, weight = 20)
g.add_edge(4,6, weight = 20)
g.add_edge(5,6, weight = 10)
g.add_edge(6,7, weight = 20)
g.add_edge(3,7, weight = 50)

if __name__ == "__main__":    
    startTime = timeit.default_timer()
    path1 = bfs(g,1,7,0)[0]
    cost1 = bfsPathCost(g, 1, 7)
    
    endTime = timeit.default_timer()
    calculations = bfs(g,1,7,0)[1]
    # find the runtime of the program in seconds
    runTime = (endTime-startTime)*10**6
    
    print('The verticies found by BFS: ' + str(path1))
    print('Cost of BFS: ' + str(cost1))
    print('Runtime in microseconds: ' + str(runTime))



Пример #35
0
def process_dump(
    input_file,
    template_file,
    out_file,
    file_size,
    file_compress,
    process_count,
    html_safe,
):
    """
    :param input_file: name of the wikipedia dump file; '-' to read from stdin
    :param template_file: optional file with template definitions.
    :param out_file: directory where to store extracted data, or '-' for stdout
    :param file_size: max size of each extracted file, or None for no max (one file)
    :param file_compress: whether to compress files with bzip.
    :param process_count: number of extraction processes to spawn.
    """
    global knownNamespaces
    global templateNamespace, templatePrefix
    global moduleNamespace, modulePrefix

    urlbase = ""  # This is obtained from <siteinfo>

    input = decode_open(input_file)

    # collect siteinfo
    for line in input:
        line = line  # .decode('utf-8')
        m = tagRE.search(line)
        if not m:
            continue
        tag = m.group(2)
        if tag == "base":
            # discover urlbase from the xml dump file
            # /mediawiki/siteinfo/base
            base = m.group(3)
            urlbase = base[:base.rfind("/")]
        elif tag == "namespace":
            knownNamespaces.add(m.group(3))
            if re.search('key="10"', line):
                templateNamespace = m.group(3)
                templatePrefix = templateNamespace + ":"
            elif re.search('key="828"', line):
                moduleNamespace = m.group(3)
                modulePrefix = moduleNamespace + ":"
        elif tag == "/siteinfo":
            break

    if expand_templates:
        # preprocess
        template_load_start = default_timer()
        if template_file and os.path.exists(template_file):
            logging.info(
                "Preprocessing '%s' to collect template definitions: this may take some time.",
                template_file,
            )
            file = decode_open(template_file)
            templates = load_templates(file)
            file.close()
        else:
            if input_file == "-":
                # can't scan then reset stdin; must error w/ suggestion to specify template_file
                raise ValueError(
                    "to use templates with stdin dump, must supply explicit template-file"
                )
            logging.info(
                "Preprocessing '%s' to collect template definitions: this may take some time.",
                input_file,
            )
            templates = load_templates(input, template_file)
            input.close()
            input = decode_open(input_file)
        template_load_elapsed = default_timer() - template_load_start
        logging.info("Loaded %d templates in %.1fs", templates,
                     template_load_elapsed)

    if out_file == "-":
        output = sys.stdout
        if file_compress:
            logging.warn(
                "writing to stdout, so no output compression (use an external tool)"
            )
    else:
        nextFile = NextFile(out_file)
        output = OutputSplitter(nextFile, file_size, file_compress)

    # process pages
    logging.info("Starting page extraction from %s.", input_file)
    extract_start = default_timer()

    # Parallel Map/Reduce:
    # - pages to be processed are dispatched to workers
    # - a reduce process collects the results, sort them and print them.

    maxsize = 10 * process_count
    # output queue
    output_queue = Queue(maxsize=maxsize)

    # Reduce job that sorts and prints output
    reduce = Process(target=reduce_process, args=(output_queue, output))
    reduce.start()

    # initialize jobs queue
    jobs_queue = Queue(maxsize=maxsize)

    # start worker processes
    logging.info("Using %d extract processes.", process_count)
    workers = []
    for _ in range(max(1, process_count)):
        extractor = Process(target=extract_process,
                            args=(jobs_queue, output_queue, html_safe))
        extractor.daemon = True  # only live while parent process lives
        extractor.start()
        workers.append(extractor)

    # Mapper process

    # we collect individual lines, since str.join() is significantly faster
    # than concatenation
    page = []
    id = ""
    revid = ""
    last_id = ""
    ordinal = 0  # page count
    inText = False
    redirect = False
    for line in input:
        if "<" not in line:  # faster than doing re.search()
            if inText:
                page.append(line)
            continue
        m = tagRE.search(line)
        if not m:
            continue
        tag = m.group(2)
        if tag == "page":
            page = []
            redirect = False
        elif tag == "id" and not id:
            id = m.group(3)
        elif tag == "id" and id:  # <revision> <id></id> </revision>
            revid = m.group(3)
        elif tag == "title":
            title = m.group(3)
        elif tag == "redirect":
            redirect = True
        elif tag == "text":
            inText = True
            line = line[m.start(3):m.end(3)]
            page.append(line)
            if m.lastindex == 4:  # open-close
                inText = False
        elif tag == "/text":
            if m.group(1):
                page.append(m.group(1))
            inText = False
        elif inText:
            page.append(line)
        elif tag == "/page":
            colon = title.find(":")
            if (colon < 0 or (title[:colon] in acceptedNamespaces)
                    and id != last_id and not redirect
                    and not title.startswith(templateNamespace)):
                job = (id, revid, urlbase, title, page, ordinal)
                jobs_queue.put(job)  # goes to any available extract_process
                last_id = id
                ordinal += 1
            id = ""
            revid = ""
            page = []

    input.close()

    # signal termination
    for _ in workers:
        jobs_queue.put(None)
    # wait for workers to terminate
    for w in workers:
        w.join()

    # signal end of work to reduce process
    output_queue.put(None)
    # wait for it to finish
    reduce.join()

    if output != sys.stdout:
        output.close()
    extract_duration = default_timer() - extract_start
    extract_rate = ordinal / extract_duration
    logging.info(
        "Finished %d-process extraction of %d articles in %.1fs (%.1f art/s)",
        process_count,
        ordinal,
        extract_duration,
        extract_rate,
    )
def sgd_optimization_mnist(
        learning_rate=0.013,
        n_epochs=100,
        dataset='D:\JupyterWorkspace\DeepLearningTutorial\mnist.pkl.gz',
        batch_size=50):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """
    '''
    datasets = load_data(dataset)
     
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    
    '''
    data = sio.loadmat(
        'D:\ResearchWork\Machine learning and MI\Code\MEMD-DL\subject_1_scheme8.mat'
    )
    processedEEG = data['processedEEG']
    processedERD = data['processedERD']
    processedSpectrum = data['processedSpectrum']
    trainLabels = data['trainLabels']
    trainLabels = trainLabels.T

    def extractbands(processedSpectrum, trainLabels):

        muband = processedSpectrum[:512, :, :]
        muband = muband[10:60, :, :]
        betaband = processedSpectrum[512:1024, :, :]
        betaband = betaband[29:100, :, :]

        totfeat = muband.shape[0] + betaband.shape[0]
        trainData = numpy.zeros((processedSpectrum.shape[2], totfeat * 2))
        labels = numpy.zeros((processedSpectrum.shape[2]))

        for i in range(
                processedSpectrum.shape[2]):  #(processedSpectrum.shape[2]
            trainData[i, :] = numpy.concatenate(
                (muband[:, 0, i], muband[:, 2, i], betaband[:, 0,
                                                            i], betaband[:, 2,
                                                                         i]))
            labels[i] = trainLabels[i]

        return trainData, labels

    trainData, labels = extractbands(processedSpectrum, trainLabels)

    indices = numpy.random.permutation(processedEEG.shape[2])
    training_idx, test_idx = indices[:350], indices[350:]
    train_set_x, test_set_x = trainData[training_idx, :], trainData[
        test_idx, :]
    train_set_y, test_set_y = labels[training_idx], labels[test_idx]
    valid_set_x = test_set_x
    valid_set_y = test_set_y

    test_set_x = theano.shared(numpy.asarray(test_set_x,
                                             dtype=theano.config.floatX),
                               borrow=True)
    test_set_y = theano.shared(numpy.asarray(test_set_y,
                                             dtype=theano.config.floatX),
                               borrow=True)
    test_set_y = T.cast(test_set_y, 'int32')

    train_set_x = theano.shared(numpy.asarray(train_set_x,
                                              dtype=theano.config.floatX),
                                borrow=True)
    train_set_y = theano.shared(numpy.asarray(train_set_y,
                                              dtype=theano.config.floatX),
                                borrow=True)

    train_set_y = T.cast(train_set_y, 'int32')

    valid_set_x = theano.shared(numpy.asarray(valid_set_x,
                                              dtype=theano.config.floatX),
                                borrow=True)
    valid_set_y = theano.shared(numpy.asarray(valid_set_y,
                                              dtype=theano.config.floatX),
                                borrow=True)
    valid_set_y = T.cast(valid_set_y, 'int32')

    # compute number of minibatches for training, validation and testing
    '''
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
    
    '''
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = 1
    n_test_batches = 1

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    classifier = LogisticRegression(input=x, n_in=242, n_out=2)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print('... training the model')
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of'
                           ' best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

                    # save the best model
                    with open('best_model.pkl', 'wb') as f:
                        pickle.dump(classifier, f)

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print('The code run for %d epochs, with %f epochs/sec' %
          (epoch, 1. * epoch / (end_time - start_time)))
    '''
Пример #37
0
        plt.axvline(i, color='k', lw=1)
    for i in spk0[1].spike_times:
        plt.axvline(i, color='r', lw=1)
    ax.set_ylabel('$w$')

    # ax = fig.add_subplot(4, 2, 8)
    # ax.plot(wt[:, 1, 1], 'r', lw=3)
    # ax.set_ylabel('$w$')

    # fig = plt.figure(figsize=(10, 5))
    # for i in spk[0].spike_times:
    #    plt.plot(wt[:, 1, 1], 'r', lw=3)
    #    plt.axvline(i, color='k', lw=1)

    plt.savefig('/tmp/%s.png' %
                (os.path.splitext(os.path.basename(__file__))[0]))
    plt.close()
    print('End %s:run()' % (os.path.splitext(os.path.basename(__file__))[0]))


if __name__ == '__main__':
    print('Begin %s:main()' %
          (os.path.splitext(os.path.basename(__file__))[0]))
    start_t = timeit.default_timer()

    setup()
    run()

    print("End %s:main() , running time: %f seconds" % (os.path.splitext(
        os.path.basename(__file__))[0], timeit.default_timer() - start_t))
Пример #38
0
filename_in = "trans-out.csv"
user_f = "users-large.csv"

# Read csv
in_read = csv.reader(open(filename_in,"rb"), delimiter=',',quoting=csv.QUOTE_ALL)
users_read = csv.reader(open(user_f,"rb"), delimiter=',',quoting=csv.QUOTE_ALL)
in_read.next()
users_read.next()

# Create user dict
# Optimization problem here if user list doesn't fit in memory
users_dict = {}
for user in users_read:
	users_dict[user[0]] = {"spending-limit" : user[1], "round-to" : user[2]}

start = timeit.default_timer()
for row in in_read:
	# row[0] == time
	# row[1] == account
	# row[2] == transaction number 
	# row[3] == amount

	# See if account number is our users
	if row[1] in users_dict:
		user_info = users_dict[row[1]]
		if float(row[3]) <= float(user_info["spending-limit"]):
			change = roundUp(float(row[3]), float(user_info["round-to"]))
			if change != "0.0":
				sendToBackend(row[0], change, row[2], row[1])
				print "REQUEST TRANS AMNT " + change + " FROM " + row[1] + " REFNUM: " + row[2]
			else:
Пример #39
0
  def train(self,
            sentences,
            total_words=None,
            word_count=0,
            total_examples=None,
            queue_factor=2,
            report_delay=1.0):
    """ Update the model's neural weights from a sequence of sentences (can be a

        once-only generator stream).
        For Word2Vec, each sentence must be a list of unicode strings.
        (Subclasses may accept other examples.)

        To support linear learning-rate decay from (initial) alpha to min_alpha,
        either total_examples
        (count of sentences) or total_words (count of raw words in sentences)
        should be provided, unless the
        sentences are the same as those that were used to initially build the
        vocabulary.
    """
    logger.info("Starting training.")

    self.neg_labels = []
    if self.negative > 0:
      # precompute negative labels optimization for pure-python training
      self.neg_labels = zeros(self.negative + 1)
      self.neg_labels[0] = 1.

    if FAST_VERSION < 0:
      import warnings
      warnings.warn(
          "C extension not loaded for Word2Vec, training will be slow. "
          "Install a C compiler and reinstall gensim for fast training.")
      self.neg_labels = []
      if self.negative > 0:
        # precompute negative labels optimization for pure-python training
        self.neg_labels = zeros(self.negative + 1)
        self.neg_labels[0] = 1.

    logger.info(
        "training model with %i workers on %i vocabulary and %i features, "
        "using sg=%s hs=%s sample=%s negative=%s window=%s", self.workers,
        len(self.vocab), self.layer1_size, self.sg, self.hs, self.sample,
        self.negative, self.window)

    if not self.vocab:
      raise RuntimeError(
          "you must first build vocabulary before training the model")
    if not hasattr(self, "syn0"):
      raise RuntimeError(
          "you must first finalize vocabulary before training the model")

    if total_words is None and total_examples is None:
      if self.corpus_count:
        total_examples = self.corpus_count
        logger.info(
            "expecting %i sentences, matching count from corpus used for vocabulary survey",
            total_examples)
      else:
        raise ValueError(
            "you must provide either total_words or total_examples, to enable alpha and progress calculations"
        )

    job_tally = 0

    if self.iter > 1:
      sentences = utils.RepeatCorpusNTimes(sentences, self.iter)
      total_words = total_words and total_words * self.iter
      total_examples = total_examples and total_examples * self.iter

    def worker_loop():
      """Train the model, lifting lists of sentences from the job_queue."""
      work = matutils.zeros_aligned(
          self.layer1_size, dtype=REAL)  # per-thread private work memory
      neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)
      jobs_processed = 0
      while True:
        job = job_queue.get()
        if job is None:
          progress_queue.put(None)
          break  # no more jobs => quit this worker
        sentences, pairwise, alpha = job
        tally, raw_tally = self._do_train_job(sentences, pairwise, alpha,
                                              (work, neu1))
        progress_queue.put(
            (len(sentences), tally, raw_tally))  # report back progress
        jobs_processed += 1
      logger.debug("worker exiting, processed %i jobs", jobs_processed)

    def job_producer():
      """Fill jobs queue using the input `sentences` iterator."""
      job_batch, batch_size = [], 0
      pushed_words, pushed_examples = 0, 0
      next_alpha = self.alpha
      if next_alpha > self.min_alpha_yet_reached:
        logger.warn("Effective 'alpha' higher than previous training cycles")
      self.min_alpha_yet_reached = next_alpha
      job_no = 0

      for sent_idx, sentence in enumerate(sentences):
        sentence_length = self._raw_word_count([sentence])

        # can we fit this sentence into the existing job batch?
        if batch_size + sentence_length <= self.batch_words:
          # yes => add it to the current job
          job_batch.append(sentence)
          batch_size += sentence_length
        else:
          # no => submit the existing job
          pair_idx = list(
              numpy.random.choice(
                  range(len(self.pairwise_constraints)), int(batch_size * 0.2)))
          pairwise_samples = [self.pairwise_constraints[x] for x in pair_idx]
          logger.debug(
              "queueing job #%i (%i words, %i sentences, %i constraints) at alpha %.05f",
              job_no, batch_size, len(job_batch), len(pairwise_samples),
              next_alpha)
          job_no += 1
          job_queue.put((job_batch, pairwise_samples, next_alpha))

          # update the learning rate for the next job
          if self.min_alpha < next_alpha:
            if total_examples:
              # examples-based decay
              pushed_examples += len(job_batch)
              progress = 1.0 * pushed_examples / total_examples
            else:
              # words-based decay
              pushed_words += self._raw_word_count(job_batch)
              progress = 1.0 * pushed_words / total_words
            next_alpha = self.alpha - (self.alpha - self.min_alpha) * progress
            next_alpha = max(self.min_alpha, next_alpha)

          # add the sentence that didn't fit as the first item of a new job
          job_batch, batch_size = [sentence], sentence_length

      # add the last job too (may be significantly smaller than batch_words)
      if job_batch:
        logger.debug(
            "queueing job #%i (%i words, %i sentences, %i constraints) at alpha %.05f",
            job_no, batch_size, len(job_batch), len(self.pairwise_constraints),
            next_alpha)
        job_no += 1
        job_queue.put((job_batch, self.pairwise_constraints, next_alpha))

      if job_no == 0 and self.train_count == 0:
        logger.warning(
            "train() called with an empty iterator (if not intended, "
            "be sure to provide a corpus that offers restartable "
            "iteration = an iterable).")

      # give the workers heads up that they can finish -- no more work!
      for _ in xrange(self.workers):
        job_queue.put(None)
      logger.debug("job loop exiting, total %i jobs", job_no)

    # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :(
    job_queue = Queue(maxsize=queue_factor * self.workers)
    progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers)

    workers = [
        threading.Thread(target=worker_loop) for _ in xrange(self.workers)
    ]
    unfinished_worker_count = len(workers)
    workers.append(threading.Thread(target=job_producer))

    for thread in workers:
      thread.daemon = True  # make interrupting the process with ctrl+c easier
      thread.start()

    example_count, trained_word_count, raw_word_count = 0, 0, word_count
    start, next_report = default_timer() - 0.00001, 1.0

    while unfinished_worker_count > 0:
      report = progress_queue.get()  # blocks if workers too slow
      if report is None:  # a thread reporting that it finished
        unfinished_worker_count -= 1
        logger.info(
            "worker thread finished; awaiting finish of %i more threads",
            unfinished_worker_count)
        continue
      examples, trained_words, raw_words = report
      job_tally += 1

      # update progress stats
      example_count += examples
      trained_word_count += trained_words  # only words in vocab & sampled
      raw_word_count += raw_words

      # log progress once every report_delay seconds
      elapsed = default_timer() - start
      if elapsed >= next_report:
        if total_examples:
          # examples-based progress %
          logger.info(
              "PROGRESS: at %.2f%% examples, %.0f words/s, in_qsize %i, out_qsize %i",
              100.0 * example_count / total_examples,
              trained_word_count / elapsed, utils.qsize(job_queue),
              utils.qsize(progress_queue))
        else:
          # words-based progress %
          logger.info(
              "PROGRESS: at %.2f%% words, %.0f words/s, in_qsize %i, out_qsize %i",
              100.0 * raw_word_count / total_words,
              trained_word_count / elapsed, utils.qsize(job_queue),
              utils.qsize(progress_queue))
        next_report = elapsed + report_delay

    # all done; report the final stats
    elapsed = default_timer() - start
    logger.info(
        "training on %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
        raw_word_count, trained_word_count, elapsed,
        trained_word_count / elapsed)
    if job_tally < 10 * self.workers:
      logger.warn(
          "under 10 jobs per worker: consider setting a smaller `batch_words' for smoother alpha decay"
      )

    # check that the input corpus hasn't changed during iteration
    if total_examples and total_examples != example_count:
      logger.warn(
          "supplied example count (%i) did not equal expected count (%i)",
          example_count, total_examples)
    if total_words and total_words != raw_word_count:
      logger.warn(
          "supplied raw word count (%i) did not equal expected count (%i)",
          raw_word_count, total_words)

    self.train_count += 1  # number of times train() has been called
    self.total_train_time += elapsed
    self.clear_sims()
    return trained_word_count
Пример #40
0
def main():
    """Parse command line options/arguments and execute."""
    try:
        arg_names = [
            "help", "version", "quick", "strict", "debug", "stop-tag="
        ]
        opts, args = getopt.getopt(sys.argv[1:], "hvqsdct:v", arg_names)
    except getopt.GetoptError:
        usage(2)

    detailed = True
    stop_tag = DEFAULT_STOP_TAG
    debug = False
    strict = False
    color = False

    for option, arg in opts:
        if option in ("-h", "--help"):
            usage(0)
        if option in ("-v", "--version"):
            show_version()
        if option in ("-q", "--quick"):
            detailed = False
        if option in ("-t", "--stop-tag"):
            stop_tag = arg
        if option in ("-s", "--strict"):
            strict = True
        if option in ("-d", "--debug"):
            debug = True
        if option in ("-c", "--color"):
            color = True

    if not args:
        usage(2)

    exif_log.setup_logger(debug, color)

    # output info for each file
    for filename in args:
        file_start = timeit.default_timer()
        try:
            img_file = open(str(filename), 'rb')
        except IOError:
            logger.error("'%s' is unreadable", filename)
            continue
        logger.info("Opening: %s", filename)

        tag_start = timeit.default_timer()

        # get the tags
        data = process_file(img_file,
                            stop_tag=stop_tag,
                            details=detailed,
                            strict=strict,
                            debug=debug)

        tag_stop = timeit.default_timer()

        if not data:
            logger.warning("No EXIF information found\n")
            continue

        if 'JPEGThumbnail' in data:
            logger.info('File has JPEG thumbnail')
            del data['JPEGThumbnail']
        if 'TIFFThumbnail' in data:
            logger.info('File has TIFF thumbnail')
            del data['TIFFThumbnail']

        tag_keys = list(data.keys())
        tag_keys.sort()

        for i in tag_keys:
            try:
                logger.info('%s (%s): %s', i,
                            FIELD_TYPES[data[i].field_type][2],
                            data[i].printable)
            except:
                logger.error("%s : %s", i, str(data[i]))

        file_stop = timeit.default_timer()

        logger.debug("Tags processed in %s seconds", tag_stop - tag_start)
        logger.debug("File processed in %s seconds", file_stop - file_start)
        print("")
                ds_model = ds_model.sel(nregions=cR)
            
#             # Take mean of ensemble
#             ds_model = ds_model.mean(dim='ensemble')

            # Get model plotting specs
            cc = E.model_color[cmod]
            cl = E.model_linestyle[cmod]

            # Plot Model
            if i == 0: # Control only one initiailzation label in legend
                no_init_label = False
            else:
                no_init_label = True
            import timeit
            start_time = timeit.default_timer()
            
            ice_plot.plot_reforecast(ds=ds_model, axin=ax1, 
                                 labelin=E.model[cmod]['model_label'],
                                 color=cc, marker=None,
                                 linestyle=cl,
                                 no_init_label=no_init_label)
            print( (timeit.default_timer() - start_time), ' seconds.' )

            # Memeory clean up
            ds_model = None     

        cxlims = ax1.get_xlim()

       # add obs and climotrend
        if (cR==99):
def transposonmapper(bamfile=bam_arg,
                     gfffile=None,
                     essentialfiles=None,
                     genenamesfile=None):
    '''
    This function is created for analysis of SATAY data using the species Saccharomyces Cerevisiae.
    It outputs the following files that store information regarding the location of all insertions:
        - .bed-file: Includes all individual basepair locations of the whole genome where at least one transposon has been mapped and the number of insertions for each locations (the number of reads) according to the Browser Extensible Data (bed) format.
                    A distinction is made between reads that had a different reading orientation during sequencing. The number of reads are stored using the equation #reads*20+100 (e.g. 2 reads is stored as 140).
        - .wig-file: Includes all individual basepair locations of the whole genome where at least one transposon has been mapped and the number of insertions for each locations (the number of reads) according to the Wiggle (wig) format.
                    In this file no distinction is made between reads that had a different reading orientation during sequencing. The number of reads are stored as the absolute count.
        - _pergene.txt-file: Includes all genes (currently 6600) with the total number of insertions and number of reads within the genomic region of the gene.
        - _peressential.txt-file: Includes all annotated essential genes (currently 1186) with the total number of insertions and number of reads within the genomic region of the gene.
        - _pergene_insertions.txt-file: Includes all genes with their genomic location (i.e. chromosome number, start and end position) and the locations of all insertions within the gene location. It also include the number number of reads per insertions.
        - _peressential_insertions.txt-file: Includes all essential genes with their genomic location (i.e. chromosome number, start and end position) and the locations of all insertions within the gene location. It also include the number number of reads per insertions.
          (note that in the latter two files, the genomic locations are continous, for example chromosome II does not start at 0, but at 'length chromosome I + 1' etc.).
    The output files are saved at the location of the input file using the same name as the input file, but with the corresponding extension.
    
    The function assumes that the reads are already aligned to a reference genome.
    The input data should be a .bam-file and the location where the .bam-file is stored should also contain an index file (.bam.bai-file, which for example can be created using sambamba).
    This function takes the following inputs:
        - bamfile [required]: Path to the bamfile. This location should also contain the .bam.bai index file (does not need to be input in this function).
        - gfffile [optional]: Path to a .gff-file including all gene information (e.g. downloaded from SGD). Default file is 'Saccharomyces_cerevisiae.R64-1-1.99.gff3'.
        - essentialfiles [optional]: Path to a .txt file containing a list all essential genes. Every line should consist of a single essential gene and the file should have one header line. Ideally this file is created using 'Create_EssentialGenes_list.py'. Default file is 'Cerevisiae_AllEssentialGenes_List.txt'.
        - genenamesfile [optional]: Path to text file that includes aliases for all genes. Default file is 'Yeast_Protein_Names.txt'.
    When the arguments for the optional files are not given, the files are used that are stored at the following location:
        "path_current_pythonscript/../data_files"
    The function uses the pysam package for handling bam files (see pysam.readthedocs.io/en/latest/index.html) and therefore this function only runs on Linux systems with SAMTools installed.
    '''

    #%% LOADING BAM FILE
    if bamfile is None:
        path = os.path.join('/home', 'gregoryvanbeek', 'Documents',
                            'data_processing')
        filename = 'SRR062634.filt_trimmed.sorted.bam'
        bamfile = os.path.join(path, filename)
    else:
        filename = os.path.basename(bamfile)
        path = bamfile.replace(filename, '')

    if os.path.isfile(bamfile):
        print('Running: ', bamfile)
    else:
        raise ValueError('Bam file not found at: ', bamfile)

#%% LOADING ADDITIONAL FILES
    files_path = os.path.join(dirname, '..', '..', 'data_files')

    #LOADING GFF-FILE
    if gfffile is None:
        gfffile = os.path.join(files_path,
                               'Saccharomyces_cerevisiae.R64-1-1.99.gff3')
    if not os.path.isfile(gfffile):
        raise ValueError('Path to GFF-file does not exist.')

    #LOADING TEXT FILES WITH ESSENTIAL GENES
    if essentialfiles is None:
        essentialfiles = os.path.join(files_path,
                                      'Cerevisiae_AllEssentialGenes_List.txt')
    if not os.path.isfile(essentialfiles):
        raise ValueError('Following path does not exist: ' + essentialfiles)
    del essentialfiles

    #LOADING TEXT FILE WITH GENE NAME ALIASES
    if genenamesfile is None:
        genenamesfile = os.path.join(files_path, 'Yeast_Protein_Names.txt')
    if not os.path.isfile(genenamesfile):
        raise ValueError('Following path does not exist: ' + genenamesfile)

#%% READ BAM FILE
    bam = pysam.AlignmentFile(bamfile,
                              'rb')  #open bam formatted file for reading

    #%% GET NAMES OF ALL CHROMOSOMES AS STORED IN THE BAM FILE
    ref_tid_dict = {}  # 'I' | 0, 'II' | 1, ...
    ref_name_list = []  # 'I', 'II', ...
    for i in range(
            bam.nreferences
    ):  #if bam.nreferences does not work, use range(17) #16 chromosomes and the mitochondrial chromosome
        ref_name = bam.get_reference_name(i)
        ref_tid_dict[ref_name] = bam.get_tid(ref_name)
        ref_name_list.append(ref_name)

    del (ref_name, i)

    #%% CONVERT CHROMOSOME NAMES IN DATA FILE TO ROMAN NUMERALS
    ref_romannums = chromosomename_roman_to_arabic()[0]
    ref_tid_roman_dict = {}
    for key, val in ref_tid_dict.items():
        ref_tid_roman_dict[ref_romannums[int(val) + 1]] = key

    del (key, val, ref_romannums)

    #%% GET SEQUENCE LENGTHS OF ALL CHROMOSOMES
    chr_length_dict = {}  # 'I' | 230218, 'II' | 813184, ...
    chr_summedlength_dict = {}  # 'I' | 0, 'II' | 230218, 'III' |  1043402, ...
    ref_summedlength = 0
    for key in ref_tid_dict:
        ref_length = bam.get_reference_length(key)
        chr_length_dict[key] = ref_length
        chr_summedlength_dict[key] = ref_summedlength
        ref_summedlength += ref_length

    del (key, ref_length, ref_summedlength)

    #%% GET NUMBER OF MAPPED, UNMAPPED AND TOTAL AMOUNT OF READS PER CHROMOSOME
    # total_reads = bam.mapped
    stats = bam.get_index_statistics()
    chr_mappedreads_dict = {}  # 'I' | [mapped, unmapped, total reads]
    for stat in stats:
        chr_mappedreads_dict[stat[0]] = [stat[1], stat[2], stat[3]]
        if stat[2] != 0:
            warnings.warn('Unmapped reads found in chromosome ' + stat[0])

    del (stat, stats)

    #%% GET ALL READS WITHIN A SPECIFIED GENOMIC REGION
    tnnumber_dict = {}
    ll = 0  #Number of unique insertions in entire genome
    for kk in ref_name_list:
        timer_start = timeit.default_timer()
        read_counter = 0

        N_reads_kk = chr_mappedreads_dict[kk][2]
        start_array = np.empty(shape=(N_reads_kk), dtype=int)
        flag_array = np.empty(shape=(N_reads_kk), dtype=int)
        readlength_array = np.empty(shape=(N_reads_kk), dtype=int)

        #RETREIVING ALL THE READS FROM THE CURRENT CHROMOSOME.
        print('Getting reads for chromosome %s ...' % kk)
        for reads in bam.fetch(kk, 0, chr_length_dict[kk], until_eof=True):
            read = str(reads).split('\t')

            start_array[read_counter] = int(read[3]) + 1
            flag_array[read_counter] = int(read[1])
            readlength_array[read_counter] = int(len(read[9]))

            read_counter += 1

        #CORRECT STARTING POSITION FOR READS WITH REVERSED ORIENTATION
        flag0coor_array = np.where(
            flag_array == 0)  #coordinates reads 5' -> 3'
        flag16coor_array = np.where(
            flag_array == 16)  # coordinates reads 3' -> 5'

        startdirect_array = start_array[flag0coor_array]
        flagdirect_array = flag_array[flag0coor_array]

        startindirect_array = start_array[flag16coor_array] + readlength_array[
            flag16coor_array]
        flagindirect_array = flag_array[flag16coor_array]

        start2_array = np.concatenate((startdirect_array, startindirect_array),
                                      axis=0)
        flag2_array = np.concatenate((flagdirect_array, flagindirect_array),
                                     axis=0)

        del (flag0coor_array, flag16coor_array, startdirect_array,
             flagdirect_array, startindirect_array, flagindirect_array)

        start2_sortindices = start2_array.argsort(
            kind='mergesort')  #use mergesort for stable sorting
        start2_array = start2_array[start2_sortindices]
        flag2_array = flag2_array[start2_sortindices]

        del start2_sortindices

        #CREATE ARRAY OF START POSITION AND FLAGS OF ALL READS IN GENOME
        ref_tid_kk = int(ref_tid_dict[kk] + 1)
        if ll == 0:
            tncoordinates_array = np.array([])

        mm = 0  # Number of unique reads per insertion
        jj = 1  # Number of unique reads in current chromosome (Number of transposons in current chromosome)
        for ii in range(1, len(start2_array)):
            if abs(
                    start2_array[ii] - start2_array[ii - 1]
            ) <= 2 and flag2_array[ii] == flag2_array[
                    ii -
                    1]:  #If two subsequent reads are within two basepairs and have the same orientation, add them together.
                mm += 1
            else:
                avg_start_pos = abs(
                    round(np.mean(start2_array[ii - mm - 1:ii])))
                if tncoordinates_array.size == 0:  #include first read
                    tncoordinates_array = np.array([
                        ref_tid_kk,
                        int(avg_start_pos),
                        int(flag2_array[ii - 1])
                    ])
                    readnumb_list = [mm + 1]
                else:
                    tncoordinates_array = np.vstack((tncoordinates_array, [
                        ref_tid_kk,
                        int(avg_start_pos),
                        int(flag2_array[ii - 1])
                    ]))
                    readnumb_list.append(mm + 1)
                mm = 0
                jj += 1
                ll += 1

            if ii == len(start2_array) - 1:  #include last read
                avg_start_pos = abs(
                    round(np.mean(start2_array[ii - mm - 1:ii])))
                tncoordinates_array = np.vstack((tncoordinates_array, [
                    ref_tid_kk,
                    int(avg_start_pos),
                    int(flag2_array[ii - 1])
                ]))
                readnumb_list.append(mm + 1)

        tnnumber_dict[kk] = jj

        del (jj, start_array, flag_array, readlength_array, flag2_array,
             start2_array, ref_tid_kk)

        timer_end = timeit.default_timer()
        print('Chromosome %s completed in %.3f seconds' %
              (kk, (timer_end - timer_start)))
        print('')

    readnumb_array = np.array(readnumb_list)
    del readnumb_list

    tncoordinatescopy_array = np.array(tncoordinates_array, copy=True)

    #%% GET LIST OF ALL GENES AND ALL ESSENTIAL GENES
    print('Getting coordinates of all genes ...')

    # GET POSITION GENES
    gff_path = os.path.join(files_path,
                            'Saccharomyces_cerevisiae.R64-1-1.99.gff3')
    genecoordinates_dict = gene_position(
        gff_path)  #'YAL069W' | ['I', 335, 649], ...

    # GET ALL ANNOTATED ESSENTIAL GENES
    essential_path = os.path.join(files_path,
                                  'Cerevisiae_AllEssentialGenes_List.txt')
    essentialcoordinates_dict = {}
    with open(essential_path, 'r') as f:
        genes = f.readlines()[1:]
        for gene in genes:
            name = gene.strip('\n')
            essentialcoordinates_dict[name] = genecoordinates_dict.get(
                name).copy()

    # GET ALIASES OF ALL GENES
    names_path = os.path.join(files_path, 'Yeast_Protein_Names.txt')
    aliases_designation_dict = gene_aliases(names_path)[
        0]  #'YMR056C' \ ['AAC1'], ...

    del (gff_path, gene, genes, name, essential_path)

    #%% CONCATENATE ALL CHROMOSOMES

    #FOR EACH INSERTION LOCATION, ADD THE LENGTH OF ALL PREVIOUS CHROMOSOMES.
    ll = 0
    for ii in range(1, len(ref_name_list)):
        ll += chr_length_dict[ref_name_list[ii - 1]]
        aa = np.where(tncoordinatescopy_array[:, 0] == ii + 1)
        tncoordinatescopy_array[aa, 1] = tncoordinatescopy_array[aa, 1] + ll

    #FOR EACH GENE LOCATION, ADD THE LENGTH OF ALL PREVIOUS CHROMOSOMES.
    for key in genecoordinates_dict:
        gene_chrom = ref_tid_roman_dict.get(genecoordinates_dict.get(key)[0])
        genecoordinates_dict[key][1] = genecoordinates_dict.get(
            key)[1] + chr_summedlength_dict.get(gene_chrom)
        genecoordinates_dict[key][2] = genecoordinates_dict.get(
            key)[2] + chr_summedlength_dict.get(gene_chrom)

    #FOR EACH ESSENTIAL GENE LOCATION, ADD THE LENGTH OF ALL PREVIOUS CHROMOSOMES.
    for key in essentialcoordinates_dict:
        gene_chrom = ref_tid_roman_dict.get(
            essentialcoordinates_dict.get(key)[0])
        essentialcoordinates_dict[key][1] = essentialcoordinates_dict.get(
            key)[1] + chr_summedlength_dict.get(gene_chrom)
        essentialcoordinates_dict[key][2] = essentialcoordinates_dict.get(
            key)[2] + chr_summedlength_dict.get(gene_chrom)

    del (ii, ll, aa, key, gene_chrom)

    #%% GET NUMBER OF TRANSPOSONS AND READS PER GENE
    print('Get number of insertions and reads per gene ...')

    #ALL GENES
    tnpergene_dict = {}
    readpergene_dict = {}
    tncoordinates_pergene_dict = {}
    # readpergenecrude_dict = {}
    for gene in genecoordinates_dict:
        xx = np.where(
            np.logical_and(
                tncoordinatescopy_array[:, 1] >=
                genecoordinates_dict.get(gene)[1],
                tncoordinatescopy_array[:, 1] <= genecoordinates_dict.get(gene)
                [2]))  #get all insertions within range of current gene
        tnpergene_dict[gene] = np.size(xx)
        readpergene_dict[gene] = sum(readnumb_array[xx]) - max(
            readnumb_array[xx],
            default=0)  #REMOVE LARGEST VALUE TO REDUCE NOISE
        # readpergenecrude_dict[gene] = sum(readnumb_array[xx])

        if np.size(xx) > 0:
            tncoordinates_pergene_dict[gene] = [
                genecoordinates_dict.get(gene)[0],
                genecoordinates_dict.get(gene)[1],
                genecoordinates_dict.get(gene)[2],
                list(tncoordinatescopy_array[xx[0][0]:xx[0][-1] + 1, 1]),
                list(readnumb_array[xx])
            ]
        else:
            tncoordinates_pergene_dict[gene] = [
                genecoordinates_dict.get(gene)[0],
                genecoordinates_dict.get(gene)[1],
                genecoordinates_dict.get(gene)[2], [], []
            ]

    #ONLY ESSENTIAL GENES
    tnperessential_dict = {}
    readperessential_dict = {}
    tncoordinates_peressential_dict = {}
    # readperessentialcrude_dict = {}
    for gene in essentialcoordinates_dict:
        xx = np.where(
            np.logical_and(
                tncoordinatescopy_array[:, 1] >=
                essentialcoordinates_dict.get(gene)[1],
                tncoordinatescopy_array[:, 1] <=
                essentialcoordinates_dict.get(gene)[2]))
        tnperessential_dict[gene] = np.size(xx)
        readperessential_dict[gene] = sum(readnumb_array[xx]) - max(
            readnumb_array[xx], default=0)
        # readperessentialcrude_dict[gene] = sum(readnumb_array[xx])

        if np.size(xx) > 0:
            tncoordinates_peressential_dict[gene] = [
                essentialcoordinates_dict.get(gene)[0],
                essentialcoordinates_dict.get(gene)[1],
                essentialcoordinates_dict.get(gene)[2],
                list(tncoordinatescopy_array[xx[0][0]:xx[0][-1] + 1, 1]),
                list(readnumb_array[xx])
            ]
        else:
            tncoordinates_peressential_dict[gene] = [
                essentialcoordinates_dict.get(gene)[0],
                essentialcoordinates_dict.get(gene)[1],
                essentialcoordinates_dict.get(gene)[2], [], []
            ]

    del (xx, gene)

    #%% CREATE BED FILE
    bedfile = bamfile + '.bed'
    print('Writing bed file at: ', bedfile)
    print('')

    with open(bedfile, 'w') as f:

        f.write('track name=' + filename + ' useScore=1\n')

        coordinates_counter = 0
        for tn in tncoordinates_array:
            refname = [
                key for key, val in ref_tid_dict.items() if val == tn[0] - 1
            ][0]
            if refname == 'Mito':
                refname = 'M'
            f.write('chr' + refname + ' ' + str(tn[1]) + ' ' + str(tn[1] + 1) +
                    ' . ' +
                    str(100 + readnumb_array[coordinates_counter] * 20) + '\n')
            coordinates_counter += 1

    del (bedfile, coordinates_counter, refname)

    #%% CREATE TEXT FILE WITH TRANSPOSONS AND READS PER GENE
    pergenefile = bamfile + '_pergene.txt'
    print('Writing pergene.txt file at: ', pergenefile)
    print('')

    with open(pergenefile, 'w') as f:

        f.write(
            'Gene name\tNumber of transposons per gene\tNumber of reads per gene\n'
        )

        for gene in tnpergene_dict:
            tnpergene = tnpergene_dict[gene]
            readpergene = readpergene_dict[gene]
            if gene in aliases_designation_dict:
                gene_alias = aliases_designation_dict.get(gene)[0]
            else:
                gene_alias = gene
            f.write(gene_alias + '\t' + str(tnpergene) + '\t' +
                    str(readpergene) + '\n')

    del (pergenefile, gene, gene_alias, tnpergene, readpergene)

    #%% CREATE TEXT FILE TRANSPOSONS AND READS PER ESSENTIAL GENE
    peressentialfile = bamfile + '_peressential.txt'
    print('Writing peressential.txt file at: ', peressentialfile)
    print('')

    with open(peressentialfile, 'w') as f:

        f.write(
            'Gene name\tNumber of transposons per gene\tNumber of reads per gene\n'
        )

        for essential in tnperessential_dict:
            tnperessential = tnperessential_dict[essential]
            readperessential = readperessential_dict[essential]
            if essential in aliases_designation_dict:
                essential_alias = aliases_designation_dict.get(essential)[0]
            else:
                essential_alias = essential
            f.write(essential_alias + '\t' + str(tnperessential) + '\t' +
                    str(readperessential) + '\n')

    del (peressentialfile, essential, essential_alias, tnperessential,
         readperessential)

    #%% CREATE TEXT FILE WITH LOCATION OF INSERTIONS AND READS PER GENE
    pergeneinsertionsfile = bamfile + '_pergene_insertions.txt'
    print('Witing pergene_insertions.txt file at: ', pergeneinsertionsfile)
    print('')

    with open(pergeneinsertionsfile, 'w') as f:

        f.write(
            'Gene name\tChromosome\tStart location\tEnd location\tInsertion locations\tReads per insertion location\n'
        )

        for gene in tncoordinates_pergene_dict:
            gene_chrom = ref_tid_roman_dict.get(
                genecoordinates_dict.get(gene)[0])
            tncoordinates = [
                ins - chr_summedlength_dict.get(gene_chrom)
                for ins in tncoordinates_pergene_dict[gene][3]
            ]

            if gene in aliases_designation_dict:
                gene_alias = aliases_designation_dict.get(gene)[0]
            else:
                gene_alias = gene

            f.write(gene_alias + '\t' +
                    str(tncoordinates_pergene_dict[gene][0]) + '\t' +
                    str(tncoordinates_pergene_dict[gene][1] -
                        chr_summedlength_dict.get(gene_chrom)) + '\t' +
                    str(tncoordinates_pergene_dict[gene][2] -
                        chr_summedlength_dict.get(gene_chrom)) + '\t' +
                    str(tncoordinates) + '\t' +
                    str(tncoordinates_pergene_dict[gene][4]) + '\n')

    del (gene, gene_chrom, tncoordinates, gene_alias, pergeneinsertionsfile)

    #%% CREATE TEXT FILE WITH LOCATION OF INSERTIONS AND READS PER ESSENTIAL GENE
    peressentialinsertionsfile = bamfile + '_peressential_insertions.txt'
    print('Writing peressential_insertions.txt file at: ',
          peressentialinsertionsfile)
    print('')

    with open(peressentialinsertionsfile, 'w') as f:

        f.write(
            'Essential gene name\tChromosome\tStart location\tEnd location\tInsertion locations\tReads per insertion location\n'
        )

        for essential in tncoordinates_peressential_dict:
            gene_chrom = ref_tid_roman_dict.get(
                genecoordinates_dict.get(essential)[0])
            tncoordinates = [
                ins - chr_summedlength_dict.get(gene_chrom)
                for ins in tncoordinates_peressential_dict[essential][3]
            ]

            if essential in aliases_designation_dict:
                essential_alias = aliases_designation_dict.get(essential)[0]
            else:
                essential_alias = essential

            f.write(essential_alias + '\t' +
                    str(tncoordinates_peressential_dict[essential][0]) + '\t' +
                    str(tncoordinates_peressential_dict[essential][1] -
                        chr_summedlength_dict.get(gene_chrom)) + '\t' +
                    str(tncoordinates_peressential_dict[essential][2] -
                        chr_summedlength_dict.get(gene_chrom)) + '\t' +
                    str(tncoordinates) + '\t' +
                    str(tncoordinates_peressential_dict[essential][4]) + '\n')

    del (essential, gene_chrom, tncoordinates, essential_alias,
         peressentialinsertionsfile)

    #%% ADD INSERTIONS AT SAME LOCATION BUT WITH DIFFERENT ORIENTATIONS TOGETHER (FOR STORING IN WIG-FILE)
    wigfile = bamfile + '.wig'
    print('Writing wig file at: ', wigfile)
    print('')

    readnumbwig_array = readnumb_array.copy()

    unique_index_array = np.array([], dtype=int)  #=cc
    N_uniques_perchr_list = []
    ll = 0
    for kk in ref_name_list:
        index = np.where(tncoordinates_array[:, 0] == int(
            ref_tid_dict[kk] + 1))  #get indices for current chromosome.
        unique_index = np.unique(
            tncoordinates_array[index][:, 1], return_index=True
        )[1]  #get all insertion locations (in tncoordinates, all rows, column 1)

        unique_index_array = np.append(unique_index_array, (unique_index + ll),
                                       axis=0)

        ll += np.count_nonzero(tncoordinates_array[:,
                                                   0] == int(ref_tid_dict[kk] +
                                                             1))
        N_uniques_perchr_list.append(
            ll)  #total amount unique indices found untill current chromosome

    del (ll, kk, unique_index)

    duplicate_list = []  #=dd
    ll = 0
    index_last_unique_previous_chromosome = 0
    for ii in N_uniques_perchr_list:
        index_last_unique = np.where(unique_index_array <= ii)[0][-1]
        for jj in range(ll, ii):
            if int(jj) not in unique_index_array[
                    index_last_unique_previous_chromosome:index_last_unique]:
                duplicate_list.append(jj)
        index_last_unique_previous_chromosome = index_last_unique
        ll = ii

    #SUM READNUMB VALUES AT INDEX IN DUPLICATE_LIST AND DUPLICATE_LIST-1
    for ii in duplicate_list:
        readnumbwig_array[ii -
                          1] = readnumbwig_array[ii -
                                                 1] + readnumbwig_array[ii]

    tncoordinateswig_duplicatesremoved_array = np.delete(tncoordinates_array,
                                                         duplicate_list,
                                                         axis=0)
    readnumbwig_duplicatesremoved_array = np.delete(readnumbwig_array,
                                                    duplicate_list,
                                                    axis=0)

    del (ll, ii, jj, N_uniques_perchr_list, index_last_unique, duplicate_list,
         readnumbwig_array)

    #%% CREATING WIG FILE
    with open(wigfile, 'w') as f:
        f.write('track type=wiggle_0 ,maxheightPixels=60 name=' + filename +
                '\n')
        for kk in ref_name_list:
            f.write('VariableStep chrom=chr' + kk + '\n')

            index = np.where(tncoordinateswig_duplicatesremoved_array[:, 0] ==
                             int(ref_tid_dict[kk] +
                                 1))  #get indices for current chromosome.
            for ii in index[0]:
                f.write(
                    str(tncoordinateswig_duplicatesremoved_array[ii][1]) +
                    ' ' + str(readnumbwig_duplicatesremoved_array[ii]) + '\n')

    del (wigfile, kk, ii, index)
Пример #43
0
# early-stopping parameters
patience = 10000  # look as this many examples regardless
patience_increase = 2  # wait this much longer when a new best is
                           # found
improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

best_validation_loss = numpy.inf
best_iter = 0
test_score = 0.
start_time = timeit.default_timer()

epoch = 0
done_looping = False

while (epoch < n_epochs) and (not done_looping):
    epoch = epoch + 1
    for minibatch_index in xrange(n_train_batches):

        iter = (epoch - 1) * n_train_batches + minibatch_index

        if iter % 100 == 0:
            print 'training @ iter = ', iter
        cost_ij = train_model(minibatch_index)

        if (iter + 1) % validation_frequency == 0:
Пример #44
0
def main():

    time_start = timeit.default_timer()

    #global PATH

    df = pd.read_csv("/home/dummy/try/SesOutFinalUS.tsv",
                     dtype=object,
                     header=None,
                     delimiter="\t",
                     error_bad_lines=False)

    #df1 = df.head(200)

    #df1.to_csv(PATH + "/outputs/sampleOut1.tsv", index = False, header= None, sep='\t')

    print 'file read'
    #['U','SES', 'DUR', 'SC','C','URL','D','T']
    df.columns = [0, 1, 2, 8, 9, 3, 4, 5]

    users = {}
    curr_user = df.iloc[0, 0]
    curr_session = df.iloc[0, 1]
    url_list = []

    session_list = []
    tier1 = 0
    tier2 = 0
    tier3 = 0
    tier4 = 0
    url_book = 'https://secure.celebritycruises.com/booking/paymentConfirmation'
    url_held = 'https://secure.celebritycruises.com/booking/courtesyHoldConfirmation'
    visits = [[0 for i in range(5)] for j in range(4)]  #lists of lists
    duration_list = [[0 for i in range(5)] for j in range(4)]
    day_diff = [[0 for i in range(4)] for j in range(4)]
    div_dur = [[0 for i in range(5)] for j in range(4)]
    duration = get_sec(df.loc[0, 2])
    temp = True
    count = 0
    c = 0
    session_f = [0, 0, 0, 0]

    sess_duration = []

    for i in range(1, len(df.index)):

        if df[0][i] == curr_user:
            if df[3][i] not in url_list:
                url_list.append(df[3][i])

        else:
            count += 1
            if url_book in url_list or url_held in url_list:
                users[curr_user] = Node(3, df[1][i - 1])  #2 is index of tier4
                tiers = 3
            else:
                if int(df[1][i - 1]) == 1 and len(set(url_list)) in [0, 1]:
                    tiers = 0
                    c += 1

                elif int(df[1][i - 1]) == 1 and len(set(url_list)) >= 2:
                    tiers = 1  #index for tier2
                else:
                    tiers = 2  #index for tier3

                users[curr_user] = Node(tiers, df[1][i - 1])

            session_f[tiers] += int(df[1][i - 1])

            curr_user = df[0][i]

            del url_list[:]
            url_list.append(df[3][i])

    print 'Total number of Users are ' + str(count)

    fivePlus = [0, 0, 0, 0]
    foTofi = [0, 0, 0, 0]

    for node in users.values():
        if node.sessions >= 5:
            index = 4
            fivePlus[node.tier] += (node.sessions - 4)
            foTofi[node.tier] += 1
        else:
            index = node.sessions - 1

        if node.tier == 0:
            tier1 += 1
            visits[0][index] += 1
        elif node.tier == 1:
            tier2 += 1
            visits[1][index] += 1
        elif node.tier == 2:
            tier3 += 1
            visits[2][index] += 1
        else:
            tier4 += 1
            visits[3][index] += 1

    print fivePlus

    for i in range(4):
        for j in range(5):
            div_dur[i][j] = doSum(i, j, visits)

    curr_session = df[1][0]
    temp = int(curr_session) - 1
    curr_user = df[0][0]
    usr_page = df[3][0]
    r = users[curr_user].tier
    duration_list[r][int(curr_session)] += get_sec(df[2][0])

    for index, user in enumerate(df[0][1:len(df.index)], start=1):

        if user != curr_user:
            duration_list[users[curr_user].tier][temp] += get_sec(df[2][index -
                                                                        1])

            curr_user = user
            curr_session = df[1][index]
            temp = getTemp(int(curr_session))
            #usr_page = df[3][index]

        elif df[1][index] != curr_session:

            if temp != 4:

                sec = get_sec(df[5][index].strip(' GMT')) - get_sec(
                    df[5][index - 1].strip(' GMT'))

                l1 = map(int, df.iloc[index, 6].split('-'))
                l2 = map(int, df.iloc[index - 1, 6].split('-'))

                d1 = date(l1[0], l1[1], l1[2])
                d2 = date(l2[0], l2[1], l2[2])

                d = (d1 - d2).days

                if sec < 0:
                    d = d - 1

                day_diff[users[curr_user].tier][temp] += d

            duration_list[users[curr_user].tier][temp] += get_sec(df[2][index -
                                                                        1])
            curr_session = df[1][index]
            temp = getTemp(int(curr_session))

        else:
            pass

    for i in range(4):
        for j in range(5):
            if div_dur[i][j] == 0:
                duration_list[i][j] = 0
            else:
                if j == 4 and fivePlus[i] != 0:
                    temp = fivePlus[i]
                else:
                    temp = div_dur[i][j]

                duration_list[i][j] = str(
                    datetime.timedelta(seconds=int(duration_list[i][j] /
                                                   temp)))

    for i in range(4):
        for j in range(4):
            if div_dur[i][j] == 0:
                day_diff[i][j] = 0
            else:
                if j == 3 and foTofi[i] != 0:
                    temp = foTofi[i]
                else:
                    temp = div_dur[i][j]

                day_diff[i][j] = day_diff[i][j] / temp

    print tier1, tier2, tier3, tier4

    print '\n'

    print visits[0]
    print visits[1]
    print visits[2]
    print visits[3]

    print '\n'

    print div_dur[0]
    print div_dur[1]
    print div_dur[2]
    print div_dur[3]

    print '\n'

    print session_f[0]
    print session_f[1]
    print session_f[2]
    print session_f[3]

    print '\n'

    print duration_list[0]
    print duration_list[1]
    print duration_list[2]
    print duration_list[3]

    print '\n'

    print day_diff[0]
    print day_diff[1]
    print day_diff[2]
    print day_diff[3]

    print '\n'

    print tier1 * 100 / (tier1 + tier2 + tier3 + tier4)

    time_stop = timeit.default_timer()

    print time_stop - time_start
Пример #45
0
    def tearDown(self):
        '''after each test function'''
        pass

    def do(self, func):
        '''todo'''
        self.assertEqual(func("hello", "ll"), 2)
        self.assertEqual(func("aaaaa", "bba"), -1)
        self.assertEqual(func("aaaac", "aac"), 2)
        pass

    def test_func(self):
        self.do(s.strStr)

        self.assertEqual(s.make_prefix("abc"), [-1, 0, 0])
        self.assertEqual(s.make_prefix("aac"), [-1, 0, 1])
        self.assertEqual(s.make_prefix("ababc"), [-1, 0, 0, 1, 2])

if __name__ == "__main__":
    count = 100000
    t = "ababcabcde"
    p = "abcd"
    utils.print_func_run_time(count, s.strStr, t = t, p = p)
    b = timeit.default_timer()
    for i in range(count):
        t.index(p)
    print(timeit.default_timer() - b)
    unittest.main()


Пример #46
0
    _save_log_ = False
    if _save_log_:
        from datetime import datetime
        from std_logger import StdFileLoggerCtrl

        # save all console activity to out_log_file
        out_log_file = os.path.join(
            r'P:\Synchronize\python_script_logs\\%s_log_%s.log' %
            (os.path.basename(__file__),
             datetime.now().strftime('%Y%m%d%H%M%S')))

        log_link = StdFileLoggerCtrl(out_log_file)

    print('#### Started on %s ####\n' % time.asctime())
    START = timeit.default_timer()

    #==========================================================================
    # When in post_mortem:
    # 1. "where" to show the stack
    # 2. "up" move the stack up to an older frame
    # 3. "down" move the stack down to a newer frame
    # 4. "interact" start an interactive interpreter
    #==========================================================================

    if DEBUG_FLAG:
        try:
            main()

        except:
            import pdb
Пример #47
0
    performance = get_current_performance(
        np.zeros(int(num_examples / update_interval)), 0)

# set firing rates to zero initially
for name in input_population_names:
    input_groups[name + 'e'].rate = 0

# initialize network
j = 0
num_retries = 0
b.run(0)

weights_name = 'XeAe' + '_' + ending

# start recording time
start_time = timeit.default_timer()

while j < num_examples:
    # fetched rates depend on training / test phase, and whether we use the
    # testing dataset for the test phase
    if test_mode:
        if use_testing_set:
            rates = testing['x'][j % 10000, :, :] / 8. * input_intensity
        else:
            rates = training['x'][j % 60000, :, :] / 8. * input_intensity

    else:
        # ensure weights don't grow without bound
        normalize_weights()
        # get the firing rates of the next input example
        rates = training['x'][j % 60000, :, :] / 8. * input_intensity
Пример #48
0
from utils import *

cv2.setNumThreads(0)
cv2.ocl.setUseOpenCL(False)

test_dir = 'test/images'
pred_folder = 'pred154_loc'
models_folder = 'weights'

all_files = np.array(get_files())
val_idxs = train_test_split(np.arange(len(all_files)).astype(int), test_size=0.1, random_state=0)[1]
all_files= all_files[val_idxs] 

if __name__ == '__main__':
    t0 = timeit.default_timer()

    makedirs(pred_folder, exist_ok=True)
    
    os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
    os.environ["CUDA_VISIBLE_DEVICES"] = sys.argv[1]

    # cudnn.benchmark = True

    models = []

    for seed in [0]:
        snap_to_load = 'se154_loc_{}_1_best'.format(seed)
        model = SeNet154_Unet_Loc().cuda()
        model = nn.DataParallel(model).cuda()
        print("=> loading checkpoint '{}'".format(snap_to_load))
Пример #49
0
x['id6'] = x['id6'].astype('category')
small = fread(src_jn_y[0]).to_pandas()
small['id4'] = small['id4'].astype('category')
medium = fread(src_jn_y[1]).to_pandas()
medium['id4'] = medium['id4'].astype('category')
medium['id5'] = medium['id5'].astype('category')
big = fread(src_jn_y[2]).to_pandas()
big['id4'] = big['id4'].astype('category')
big['id5'] = big['id5'].astype('category')
big['id6'] = big['id6'].astype('category')
print(len(x.index), flush=True)
print(len(small.index), flush=True)
print(len(medium.index), flush=True)
print(len(big.index), flush=True)

task_init = timeit.default_timer()
print("joining...", flush=True)

question = "small inner on int" # q1
gc.collect()
t_start = timeit.default_timer()
ans = x.merge(small, on='id1')
print(ans.shape, flush=True)
t = timeit.default_timer() - t_start
m = memory_usage()
t_start = timeit.default_timer()
chk = [ans['v1'].sum(), ans['v2'].sum()]
chkt = timeit.default_timer() - t_start
write_log(task=task, data=data_name, in_rows=x.shape[0], question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk)
del ans
gc.collect()
def main(arguments,output_filename):
    ''' Parse user input, query SQL database, generate pandas dataframes, export JSON for D3 and print HTML code '''

    ######################################################
    ### start the alert div that contains any output generated here
    ######################################################
    algorithm_output_str = ''

    timing = {} 
    start_all = timeit.default_timer()
    
    ######################################################
    # generate an input overview table
    ######################################################
    arg_names = ['Genes','Cluster by','Color by','Interaction type',\
                'Minimal number of experiments','Minimal number of publications', 'Minimal number of methods','Method types',\
                'Process','Compartment','Expression','Max. number nodes','Filter condition']
    input_dict = { arg_names[i]:arguments[i].replace("_"," ") for i in range(len(arg_names)) } # does not include unique_str and excel_flag
    input_dict['Expression'] = input_dict['Expression'].replace('G1P','G1(P)') # brackets removed in PHP

    df_user_input = pd.DataFrame.from_dict(input_dict,orient='index')
    df_user_input = df_user_input.reindex(index = arg_names)
    df_user_input.columns = ['user input']
    df_user_input_to_print = df_user_input.to_html(classes=['table','table-condensed','table-bordered'])

    ### process arguments 
    primary_nodes,cluster_by,color_by,int_type,\
    min_exp,min_pub,min_methods,method_types,\
    process,compartment,expression,\
    max_nodes,filter_condition,\
    excel_flag,filter_flag,unique_str = arguments

    # make sure types are correct
    color_by = color_by.replace('_',' ')
    cluster_by = cluster_by.replace('_',' ')
    filter_condition = filter_condition.replace('_',' ')

    process = process.split(',')
    method_types = method_types.split(',')
    method_types = [x.replace('_',' ') for x in method_types]
    expression = expression.split(',')
    if 'G1P' in expression: # brackets removed in php
      ind = expression.index('G1P')
      expression[ind] = 'G1(P)'

    process = [x.replace("_"," ") for x in process]

    primary_nodes_str = primary_nodes
    if '_' in primary_nodes:
      primary_nodes = primary_nodes.split('_')  
    else:
      primary_nodes = [primary_nodes]

    min_exp = int(min_exp)
    min_pub = int(min_pub)
    min_methods = int(min_methods)
    max_nodes = int(max_nodes)
    excel_flag = bool(int(excel_flag))
    filter_flag = bool(int(filter_flag))

    split_types = int_type.split(',')

    compartment = compartment.replace('_',' ')

    timing['input'] = timeit.default_timer() - start_all


    if excel_flag:
      ######################################################
      # WRITE TO EXCEL
      ######################################################
      # THIS HAS TO HAPPEN BEFORE HTML REPLACEMENTS
      start_excel = timeit.default_timer()

      write_excel_file(primary_nodes_str+'_'+unique_str)

      timing['excel'] = timeit.default_timer() - start_excel

      print(timing)

      return
    
    ######################################################
    ### get all interactions related to the input IDs
    ######################################################
    start_initial = timeit.default_timer()

    database = SCRIPT_DIR+"/data/DB_genes_and_interactions.db"
    conn = create_connection(database)

    # get all interactions in which the given genes takes part
    placeholders = ', '.join('?' for unused in primary_nodes) # '?, ?, ?, ...'

    # The query differs based on whether we need to subselect on the 'type' of interaction
    if len(split_types) == 3:
      query = "SELECT source,target FROM interactions WHERE ( (source IN (%s) or target IN (%s)) and num_experiments >= (%s) \
        and num_publications >= (%s) and num_methods >= (%s))" % (placeholders,placeholders,min_exp,min_pub,min_methods)
      cursor = conn.execute(query,primary_nodes+primary_nodes)
    else:
      placeholders_type = ', '.join('?' for unused in split_types)
      query = "SELECT source,target FROM interactions WHERE ( (source IN (%s) or target IN (%s)) AND type IN (%s) \
        AND num_experiments >= (%s) and num_publications >= (%s) and num_methods >= (%s))" % (placeholders,placeholders, \
        placeholders_type,min_exp,min_pub,min_methods)
      cursor = conn.execute(query,primary_nodes+primary_nodes+split_types)

    # construct dataframe of interacting genes: the nodes
    node_list = list(set([x for y in cursor for x in y])) # get rid of duplicates of which there will be many

    if len(node_list) == 0:
      raise ValueError('No interactions matching these conditions.')

    # get the info from the database for each node to make the 'nodes' dataframe
    if 'No_data' in expression:
      query = """SELECT standard_name,systematic_name,name_desc,desc,go_term_1,go_term_2,\
                            GFP_abundance,GFP_localization,CYCLoPs_Excel_string,CYCLoPs_html,expression_peak_phase,\
                            expression_peak_time,CYCLoPs_dict FROM genes \
                            WHERE standard_name in (%s) AND (standard_name in (%s) OR expression_peak_phase in (%s) OR expression_peak_phase is NULL) AND (standard_name in (%s) OR go_term_1 in (%s) OR go_term_2 in (%s))""" \
                            % (', '.join('?' for _ in node_list), ', '.join('?' for _ in primary_nodes), ', '.join('?' for _ in expression), ', '.join('?' for _ in primary_nodes),', '.join('?' for _ in process),', '.join('?' for _ in process))
    
    else:
      query = """SELECT standard_name,systematic_name,name_desc,desc,go_term_1,go_term_2,\
                            GFP_abundance,GFP_localization,CYCLoPs_Excel_string,CYCLoPs_html,expression_peak_phase,\
                            expression_peak_time,CYCLoPs_dict FROM genes \
                            WHERE standard_name in (%s) AND (standard_name in (%s) OR expression_peak_phase in (%s)) AND (standard_name in (%s) OR go_term_1 in (%s) OR go_term_2 in (%s))""" \
                            % (', '.join('?' for _ in node_list), ', '.join('?' for _ in primary_nodes), ', '.join('?' for _ in expression), ', '.join('?' for _ in primary_nodes), ', '.join('?' for _ in process),', '.join('?' for _ in process))
    cursor = conn.execute(query,node_list+primary_nodes+expression+primary_nodes+process+process)

    data = [list(l) for l in cursor] # cursor itself is a generator, this is a list of lists
    nodes = pd.DataFrame(data,columns=['Standard name','Systematic name','Name description','Description',
                        'GO term 1','GO term 2','GFP abundance','GFP localization','CYCLoPs_Excel_string',
                        'CYCLoPs_html','Expression peak phase','Expression peak time','CYCLoPs_dict'])
    
    timing['Get node information from database'] = timeit.default_timer() - start_initial

    ### make actual dictionaries out of CYCLoPs_dict column
    nodes['CYCLoPs_dict'] = nodes['CYCLoPs_dict'].apply(ast.literal_eval)

    len_nodes_query = len(nodes)

    ######################################################
    ### BASED ON THE COMPARTMENT FILTER: DROP NODES
    ######################################################
    start_node_drop = timeit.default_timer()

    if 'GFP:' in compartment:
      comp_to_check = compartment.replace('GFP:','')
      print('Prior to compartment filtering:', len(nodes), 'nodes. Filtering on', comp_to_check)
      s = pd.Series([comp_to_check in x for x in nodes['GFP localization'].str.split(', ')])
      nodes = nodes[s.values]
      nodes = nodes.reset_index(drop=True)
      print('After compartment filtering:', len(nodes), 'nodes.')
    elif 'CYCLoPs:' in compartment:
      comp_to_check = compartment.replace('CYCLoPs:','')
      print('Prior to compartment filtering:', len(nodes), 'nodes. Filtering on', comp_to_check)
      l_o_l = [[list(nodes.iloc[i]['CYCLoPs_dict'][x].keys()) for x in list(nodes.iloc[i]['CYCLoPs_dict'].keys()) ] for i in range(len(nodes)) ]
      s = pd.Series([comp_to_check in [v for WT in l_o_l[i] for v in WT] for i in range(len(l_o_l))]) 
      nodes = nodes[s.values]
      nodes = nodes.reset_index(drop=True)
      print('After compartment filtering:', len(nodes), 'nodes.')
    else: #it is 'Any'
      pass

    ### Combine the expression columns
    nodes['Expression peak'] = nodes['Expression peak phase'] + " (" + nodes['Expression peak time'].map(str) + " min)"
    nodes['Expression peak'] = nodes['Expression peak'].mask(nodes['Expression peak'].isnull(), "No data")

    # alphabetize
    nodes = nodes.sort_values(by='Standard name',ascending=True)
    nodes = nodes.reset_index(drop=True)
    node_list = list(nodes['Standard name'].values)

    nodes['primary node'] = [x in primary_nodes for x in nodes['Standard name']]

    if len(nodes) == 0:
      raise ValueError("Filtering left no nodes.")

    timing['Node filter: compartment'] = timeit.default_timer() - start_node_drop


    ######################################################
    # Clustering and coloring
    ######################################################
    start = timeit.default_timer()

    ### Clustering part
    if cluster_by in ['GO term 1','GO term 2']:
      nodes['cluster'] = nodes[cluster_by]
    elif 'CYCLoPs WT' in cluster_by:
      WT_string = 'WT' + cluster_by[-1]

      # loop over all nodes find their highest expression compartment for the WT given by WT_string 
      # NOTE: SOMETIMES A DICTIONARY WITH EXPRESSION DATA FOR A GIVEN WT IS EMPTY WE NEED TO CHECK FOR THIS
      # Example: GET1 in WT1
      l = nodes['CYCLoPs_dict'].values
      l_max_comps = [ max(l[i][WT_string], key=lambda key: l[i][WT_string][key]) if (type(l[i]) != str and len(l[i][WT_string]) > 0) else 'No data' for i in range(len(nodes))]
      nodes['cluster'] = pd.Series(l_max_comps).values
    elif cluster_by == 'No clustering':
      nodes['cluster'] = ['No clustering' for i in range(len(nodes))]
    else:
      raise SystemExit(cluster_by,f"Unexpected value for clustering variable: {cluster_by}.")
    
    if color_by in ['GO term 1','GO term 2']:
      # set the color based on the color_by variable in a new column of 'nodes' DF
      nodes['color'] = nodes[color_by]

    elif 'CYCLoPs WT' in color_by:
      WT_string = 'WT' + color_by[-1]

      # loop over all nodes find their highest expression compartment for the WT given by WT_string 
      # NOTE: SOMETIMES A DICTIONARY WITH EXPRESSION DATA FOR A GIVEN WT IS EMPTY WE NEED TO CHECK FOR THIS
      # Example: GET1 in WT1
      l = nodes['CYCLoPs_dict'].values
      l_max_comps = [ max(l[i][WT_string], key=lambda key: l[i][WT_string][key]) if \
        (type(l[i]) != str and len(l[i][WT_string]) > 0) else 'No data' for i in range(len(nodes))]

      # set the color based on the maximum compartment found above in a new column in the nodes DF          
      nodes['color'] = pd.Series(l_max_comps).values
    elif color_by == "Peak expression phase":
      nodes['color'] = nodes['Expression peak phase']
    elif color_by == 'No coloring':
      nodes['color'] = ["No data" for i in range(len(nodes))]
    else:
      raise SystemExit(color_by, f'Unexpected value for coloring variable: {color_by}')

    # now we can drop expression peak phase/time as separate fields 
    nodes = nodes.drop('Expression peak phase',1)
    nodes = nodes.drop('Expression peak time',1)
    
    timing['Setting node cluster and color attributes'] = timeit.default_timer() - start

    len_nodes_filtered_comp = len(nodes)

    ######################################################
    ### GET ALL INTERACTIONS BETWEEN ALL NODES
    ######################################################
    start_final_sql = timeit.default_timer()
    max_interactions = 10000 # a too high value here seems to make the server run out of memory and this is the most time-expensive step on the server
    placeholders = ', '.join('?' for unused in node_list) # '?, ?, ?, ...'
    placeholders_primary_nodes = ', '.join('?' for unused in primary_nodes)

    # Multiple query options
    # if there are more than max_interactions satisfying the criteria then ORDEr BY:
    # - Pick interactions with primary_nodes first
    # - pick regulations/physical over genetic
    # - pick more over less: exp, pubs, methods
    # - Pick regulation over physical when equal in exp/pubs/methods because regulatory interactions are often singular in these. 
    if len(split_types) == 3:
      query = "SELECT * FROM interactions \
        WHERE ( (source IN (%s) AND target IN (%s)) \
        AND num_experiments >= (%s) AND num_publications >= (%s) AND num_methods >= (%s)) \
        ORDER BY \
        CASE WHEN ((source IN (%s)) OR (target IN (%s))) THEN 1 ELSE 2 END ASC, \
        CASE type WHEN 'physical' OR 'regulation' THEN 1 WHEN 'genetic' THEN 2 END ASC, \
        num_experiments DESC, num_publications DESC, num_methods DESC, \
        CASE type WHEN 'regulation' THEN 1 WHEN 'physical' THEN 2 WHEN 'genetic' THEN 3 END ASC \
        limit (%s)" \
        % (placeholders,placeholders,min_exp,min_pub,min_methods,placeholders_primary_nodes,placeholders_primary_nodes,max_interactions)
      
      interactome = pd.read_sql_query(query, conn, params=node_list+node_list+primary_nodes+primary_nodes)
    
    else:
      placeholders_type = ', '.join('?' for unused in split_types)
      query = "SELECT * FROM interactions \
        WHERE ( (source IN (%s) AND target IN (%s)) AND type IN (%s) \
        AND num_experiments >= (%s) and num_publications >= (%s) and num_methods >= (%s)) \
        ORDER BY \
        CASE WHEN ((source IN (%s)) OR (target IN (%s))) THEN 1 ELSE 2 END ASC, \
        CASE type WHEN 'physical' OR 'regulation' THEN 1 WHEN 'genetic' THEN 2 END ASC, \
        num_experiments DESC, num_publications DESC, num_methods DESC, \
        CASE type WHEN 'regulation' THEN 1 WHEN 'physical' THEN 2 WHEN 'genetic' THEN 3 END ASC \
        limit (%s)" \
        % (placeholders, placeholders,placeholders_type,min_exp,min_pub,min_methods,placeholders_primary_nodes,placeholders_primary_nodes,max_interactions)
      
      interactome = pd.read_sql_query(query, conn, params=node_list+node_list+split_types+primary_nodes+primary_nodes)

    interactome.columns = ['source','target','type','Evidence','Evidence HTML','#Experiments',\
        '#Publications','#Methods']
    timing['Interactome SQL + dataframe + processing'] = timeit.default_timer() - start_final_sql


    ######################################################
    ### BASED ON THE METHOD TYPES FILTER: DROP INTERACTIONS
    ######################################################
    start = timeit.default_timer()

    to_drop = []
    with open(SCRIPT_DIR+'/data/unique_experimental_methods.txt') as f:
      read_methods = f.read().splitlines()
    total_methods = len(read_methods)
    if len(method_types) < total_methods: # some have been deselected
      algorithm_output_str += '<p>' + 'We have on file: ' + str(total_methods) + ' methods. User queried for: ' + str(len(method_types)) + '</p>'
      
      len_before = len(interactome)

      interactome = interactome[interactome.apply(lambda x: find_methods_in_evidence(x['Evidence'],method_types),1)]

      algorithm_output_str += '<p>' + 'We dropped: ' + str(len_before - len(interactome)) + ' interactions based on the methods.' + '</p>'

    if len(interactome) == 0:
      raise ValueError('No interactions matching these conditions.')
    
    timing['Filter based on methods'] = timeit.default_timer() - start
    

    ######################################################
    # Network properties with networkx: 1
    ######################################################
    start = timeit.default_timer()

    df_network = pd.Series()
    df_network['Number of nodes'] = len(nodes)
    df_network['Number of edges'] = len(interactome)

    # use networkx
    nodes, interactome, df_network, G = calc_network_props(primary_nodes, nodes, interactome, df_network, filter_condition)

    df_network = df_network.to_frame()
    df_network = df_network.transpose()

    timing['networkx properties calculation'] = timeit.default_timer() - start


    ######################################################
    # Export visualized networkx graph to graph formats (GEFX)
    ######################################################
    start = timeit.default_timer()

    nx.write_gexf(G, SCRIPT_DIR+'/../output/networkx/' + primary_nodes_str + "_" + unique_str + "_full.gexf")

    timing['networkx export'] = timeit.default_timer() - start


    ######################################################
    # Save the full network information
    ######################################################
    start = timeit.default_timer()
    nodes_full = nodes.copy()
    interactome_full = interactome.copy()
    timing['Save full network'] = timeit.default_timer() - start

    ######################################################
    # Pickle the dataframes
    ######################################################
    start = timeit.default_timer()
    filename_base = os.path.abspath(SCRIPT_DIR+'/../output/excel_files/')
    file_id = primary_nodes_str+'_'+unique_str

    df_user_input.to_pickle(filename_base+'/user_input_'+file_id)
    nodes_full.to_pickle(filename_base+'/nodes_'+file_id)
    interactome_full.to_pickle(filename_base+'/interactome_'+file_id)
    timing['Pickle full network'] = timeit.default_timer() - start


    # ######################################################
    # # WRITE "FULL" NETWORK TO JSON
    # # this will include a filtering step for really big networks
    # ######################################################
    start_json = timeit.default_timer()
    write_network_to_json(nodes_full,interactome_full,filter_condition,output_filename,G,'full',primary_nodes)
    timing['json_full'] = timeit.default_timer() - start_json

    ######################################################
    # FILTER NODES TO MANAGEABLE VISUALIZATION

    if (filter_flag):
      start_filter = timeit.default_timer()
      len_interactome = len(interactome)

      # reduce nodes
      nodes = nodes.sort_values(by=['primary node',filter_condition],ascending=False)
      nodes = nodes.iloc[:max_nodes]
      nodes.reset_index(drop=True,inplace=True)

      # reduce interactions 
      n = nodes['Standard name'].values # list of remaining node IDs
      interactome = interactome[ (interactome['source'].isin(n)) & (interactome['target'].isin(n)) ]
      interactome.reset_index(drop=True,inplace=True)

      # SHOW WARNING MESSAGE ABOUT FILTER STEP
      filter_message = "Note: this query returned {} nodes and {} interactions. We reduced the network to {} nodes based on {} resulting in {} interactions. \
                      All interactions and nodes are contained in the <i>full</i> Excel file. ".format(len_nodes_filtered_comp,len_interactome,max_nodes,filter_condition,len(interactome))
      s = filter_message

      print("<script>create_alert(\""+s+"\",\"alert-warning\");</script>")

      timing['filter'] = timeit.default_timer() - start_filter

      ######################################################
      # Network properties with networkx: 2
      ######################################################
      start = timeit.default_timer()
      
      # df_network = pd.Series()
      df_network['Number of nodes'] = len(nodes)
      df_network['Number of edges'] = len(interactome)

      # use networkx
      nodes, interactome, df_network, G = calc_network_props(primary_nodes, nodes, interactome, df_network, filter_condition)

      timing['networkx properties calculation'] += timeit.default_timer() - start

      ######################################################
      # Export full networkx graph to graph formats (GEFX)
      ######################################################
      start = timeit.default_timer()

      nx.write_gexf(G, SCRIPT_DIR+'/../output/networkx/' + primary_nodes_str + "_" + unique_str + ".gexf")

      timing['networkx export'] += timeit.default_timer() - start

      ######################################################
      # Nxviz image generation: matrixplot
      ######################################################
      start = timeit.default_timer()

      c = nv.MatrixPlot(G)
      c.draw()
      plt.savefig(SCRIPT_DIR+'/../output/nxviz/matrix_' + unique_str + '.png')

      timing['nxviz matrix plot'] = timeit.default_timer() - start


    ######################################################
    ### Write the network to json
    ######################################################
    start_json = timeit.default_timer()
    write_network_to_json(nodes,interactome,filter_condition,output_filename,G)
    timing['json'] = timeit.default_timer() - start_json

    # remove the Evidence HTML column
    interactome = interactome.drop('Evidence',1)
    interactome = interactome.rename(columns={'Evidence HTML':'Evidence'})

    if not excel_flag:
      ######################################################
      ### End output text alert div
      ######################################################
      print("</div>")

      ######################################################
      # Generate strings for the nodes and interactome dataframes to print
      ######################################################
      start_print = timeit.default_timer()

      # drop columns
      nodes = nodes.drop(['Description','CYCLoPs_Excel_string','CYCLoPs_dict','cluster','color'],1)

      # Add HTML links to database/SGD to symbols
      nodes['Standard name'] = nodes['Standard name'].apply(lambda x: "<a href='index.php?id=database&gene=" + x + "' target='blank'>" + x + "</a>")

      # change CYCLoPs column name and export html
      # escape makes the HTML links work
      nodes = nodes.rename(columns={'CYCLoPs_html':'CYCLoPs'})
      nodes = nodes.to_html(escape=False,index=False,classes=['table','table-condensed','table-bordered'])
      nodes = nodes.replace('<table','<table id=\"proteins_table\"',1)

      interactome['source'] = interactome['source'].apply(lambda x: "<a href='index.php?id=database&gene=" + x + "' target='blank'>" + x + "</a>" )
      interactome['target'] = interactome['target'].apply(lambda x: "<a href='index.php?id=database&gene=" + x + "' target='blank'>" + x + "</a>")

      # escape makes the HTML links work
      interactome = interactome.to_html(escape=False,index=False,classes=['table','table-condensed','table-bordered'])
      interactome = interactome.replace('<table','<table id=\"interactions_table\"',1)

      ######################################################
      # PRINT COLLAPSABLE BOOTSTRAP HTML CODE WITH THE DATAFRAMES
      ######################################################
      # the 'in' class makes the collapse open by default: the interactions here
      print("""
        <div class="panel-group" id="accordion">
          <div class="panel panel-default">
            <div class="panel-heading">
              <h4 class="panel-title">
                <a data-toggle="collapse" data-parent="#accordion" href="#collapse1">
                User input</a>
              </h4>
            </div>
            <div id="collapse1" class="panel-collapse collapse">
              <div class="panel-body">
                <div class="table-responsive">
              """)
      print(df_user_input_to_print)
      print("""
                </div>
              </div>
            </div>
          </div>
          <div class="panel panel-default">
            <div class="panel-heading">
              <h4 class="panel-title">
                <a data-toggle="collapse" data-parent="#accordion" href="#collapse2">
                Network properties</a>
              </h4>
            </div>
            <div id="collapse2" class="panel-collapse collapse">
              <div class="panel-body">
                <div class="table-responsive">
              """)
      print(df_network.to_html(classes=['table','table-condensed','table-bordered'],index=False))
      print("""
                </div>
              </div>
            </div>
          </div>
          <div class="panel panel-default">
            <div class="panel-heading">
              <h4 class="panel-title">
                <a data-toggle="collapse" data-parent="#accordion" href="#collapse3">
                Network nodes (proteins)</a>
              </h4>
            </div>
            <div id="collapse3" class="panel-collapse collapse">
              <div class="panel-body">
                Use the search utility to find the gene you are looking for. The table scrolls horizontally and vertically. 
                By clicking the column headers the table will be sorted on that column. Use shift+click to sort on multiple columns. 
                Default sorting is on number of experiments, number of publications, number of methods and alphabetical on standard name, in that order.
                <div class="table-responsive">
                """)
      print(nodes)
      print("""
                </div>
              </div>
            </div>
          </div>
          <div class="panel panel-default">
            <div class="panel-heading">
              <h4 class="panel-title">
                <a data-toggle="collapse" data-parent="#accordion" href="#collapse4">
                Interactions</a>
              </h4>
            </div>
            <div id="collapse4" class="panel-collapse collapse">
              <div class="panel-body">
                Use the search utility to find the gene you are looking for. 
                By clicking the column headers the table will be sorted on that column. Use shift+click to sort on multiple columns. 
                Default sorting is on number of experiments, number of publications, number of methods and alphabetical on standard name, in that order.
                <div class="table-responsive">
              """)
      print(interactome)
      print("""
                </div>
              </div>
            </div>
          </div>
          """)
      

      ######################################################
      # Optional diagnostics
      ######################################################
      print("""
        <div class="panel panel-default">
          <div class="panel-heading">
            <h4 class="panel-title">
              <a data-toggle="collapse" data-parent="#accordion" href="#collapse5">
              Diagnostics: calculation time</a>
            </h4>
          </div>
          <div id="collapse5" class="panel-collapse collapse">
            <div class="panel-body">
              <div class="table-responsive">
      """)
      timing['print frames'] = timeit.default_timer() - start_print
      timing['all'] = timeit.default_timer() - start_all
      df_timing = pd.Series(timing)
      df_timing = df_timing.to_frame()
      df_timing.columns = ['Time']
      df_timing['Percentage'] = [v/timing['all']*100 for v in df_timing['Time'] ]
      print(df_timing.sort_values('Percentage').to_html(classes=['table','table-condensed','table-bordered']))
      print("Accounted for:", sum([timing[k] for k in timing if k != 'all' ])/timing['all'] * 100, "percent of the time spent in Python.")
      print("""
                </div>
              </div>
            </div>
          </div>
        </div>
        """)


      ######################################################
      # Show algorithm output in an alert at the bottom of the page
      ######################################################
      if algorithm_output_str != '':
        print("<div class=\"alert alert-dismissable alert-info\">")
        print(algorithm_output_str)
        print("</div>")
Пример #51
0
    """Step3. Save all the results"""

    if not os.path.exists(args.outputDir):
        os.makedirs(args.outputDir)

    pvalues = {}
    pvalues['Gpvals'] = gpvals.tolist()
    pvalues['clu_pvals'] = clu_pvals.tolist()
    pvalues['Lpvals_fdr'] = lpvals_fdr.tolist()

    with open(os.path.join(args.outputDir, 'pvalues.json'), 'w') as outfile:
        json.dump(pvalues, outfile)

    efit = {}
    efit['efitBetas'] = efit_beta.tolist()
    efit['efitYdesign'] = efity_design.tolist()
    efit['efitEtas'] = efit_eta.tolist()

    with open(os.path.join(args.outputDir, 'efit.json'), 'w') as outfile:
        json.dump(efit, outfile)


if __name__ == '__main__':

    args = parser.parse_args()

    start_all = timeit.default_timer()
    run_script(args)
    stop_all = timeit.default_timer()
    delta_time_all = str(stop_all - start_all)
    print("The total elapsed time is " + delta_time_all)
	def multiply_by_Z_viaMKL( self, x ):
		'''Multiplies the vector passed as argument by the matrix Z'''
		code = 'multiply_by_Z_viaMKL'
		start = default_timer()

		# Dissecting the "cspblas_dcsrgemv" name:
		# "c" - for "c-blas" like interface (as opposed to fortran)
		#    Also means expects sparse arrays to use 0-based indexing, which python does
		# "sp"  for sparse
		# "d"   for double-precision
		# "csr" for compressed row format
		# "ge"  for "general", e.g., the matrix has no special structure such as symmetry
		# "mv"  for "matrix-vector" multiply

		A = self.data_obj.Z

		if not sparse.isspmatrix_csr(A):
			raise Exception("Matrix must be in csr format")
	        
		(m,n) = A.shape

		# # The data of the matrix
		# data    = A.data.ctypes.data_as(POINTER(c_double))
		# indptr  = A.indptr.ctypes.data_as(POINTER(c_int))
		# indices = A.indices.ctypes.data_as(POINTER(c_int))

		# Allocate output, using same conventions as input
		nVectors = 1
		if x.ndim is 1:
			y = np.empty(m,dtype=np.double,order='F')
			if x.size != n:
				raise Exception("x must have n entries. x.size is %d, n is %d" % (x.size,n))
		elif x.shape[1] is 1:
			y = np.empty((m,1),dtype=np.double,order='F')
			if x.shape[0] != n:
				raise Exception("x must have n entries. x.size is %d, n is %d" % (x.size,n))
		else:
			nVectors = x.shape[1]
			y = np.empty((m,nVectors),dtype=np.double,order='F')
			if x.shape[0] != n:
				raise Exception("x must have n entries. x.size is %d, n is %d" % (x.size,n))

		# Check input
		if x.dtype.type is not np.double:
			x = x.astype(np.double,copy=True)
	        
		# Put it in column-major order, otherwise for nVectors > 1 this FAILS completely
		if x.flags['F_CONTIGUOUS'] is not True:
			x = x.copy(order='F')

		if nVectors == 1:
			np_x = x.ctypes.data_as(POINTER(c_double))
			np_y = y.ctypes.data_as(POINTER(c_double))
			# now call MKL. This returns the answer in np_y, which links to y
			self.SpMV(byref(c_char(b"N")), byref(c_int(m)), self.Z_data , self.Z_indptr, self.Z_indices, np_x, np_y) 
		else:
			for columns in xrange(nVectors):
				xx = x[:,columns]
				yy = y[:,columns]
				np_x = xx.ctypes.data_as(POINTER(c_double))
				np_y = yy.ctypes.data_as(POINTER(c_double))
				self.SpMV(byref(c_char("N")), byref(c_int(m)),data,indptr, indices, np_x, np_y) 

		end = default_timer()
		time_elapsed = end - start
		self.update_time(code, time_elapsed)

		return y
def main():
   bstTimeStart = timeit.default_timer()
   bst(A)
   bstTimeEnd = timeit.default_timer()
   print("The BST comparison count is:", bstCount)
   print("The BST completion time is:", (bstTimeEnd - bstTimeStart) * 1000)
Пример #54
0
from pyspark import SparkConf

from pyspark.ml import Pipeline
from pyspark.ml.classification import DecisionTreeClassifier
from pyspark.ml.feature import StringIndexer, VectorIndexer, VectorAssembler
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

from pyspark.ml.feature import PCA
from pyspark.ml.linalg import Vectors

from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

if __name__ == "__main__":

    timerstart = timeit.default_timer()

    #Check arguments
    if len(sys.argv) != 3:
        print("Usage: train.py <csv dataset> <model output path>",
              file=sys.stderr)
        sys.exit(-1)

    # Create spark session
    spark = SparkSession\
        .builder\
        .appName("TrainDecisionTreeIDS-Python")\
        .getOrCreate()

    # Define dataset schema, dataset csv generated by flowtbag https://github.com/DanielArndt/flowtbag
    schema = StructType([
Пример #55
0
def main(opts):
    adj2_ = torch.from_numpy(graph.cihp2pascal_nlp_adj).float()
    adj2_test = adj2_.unsqueeze(0).unsqueeze(0).expand(1, 1, 7,
                                                       20).cuda().transpose(
                                                           2, 3)

    adj1_ = Variable(
        torch.from_numpy(graph.preprocess_adj(graph.pascal_graph)).float())
    adj3_test = adj1_.unsqueeze(0).unsqueeze(0).expand(1, 1, 7, 7).cuda()

    cihp_adj = graph.preprocess_adj(graph.cihp_graph)
    adj3_ = Variable(torch.from_numpy(cihp_adj).float())
    adj1_test = adj3_.unsqueeze(0).unsqueeze(0).expand(1, 1, 20, 20).cuda()

    p = OrderedDict()  # Parameters to include in report
    p['trainBatch'] = opts.batch  # Training batch size
    p['nAveGrad'] = 1  # Average the gradient of several iterations
    p['lr'] = opts.lr  # Learning rate
    p['lrFtr'] = 1e-5
    p['lraspp'] = 1e-5
    p['lrpro'] = 1e-5
    p['lrdecoder'] = 1e-5
    p['lrother'] = 1e-5
    p['wd'] = 5e-4  # Weight decay
    p['momentum'] = 0.9  # Momentum
    p['epoch_size'] = 10  # How many epochs to change learning rate
    p['num_workers'] = opts.numworker
    backbone = 'xception'  # Use xception or resnet as feature extractor,

    with open(opts.txt_file, 'r') as f:
        img_list = f.readlines()

    max_id = 0
    save_dir_root = os.path.join(os.path.dirname(os.path.abspath(__file__)))
    exp_name = os.path.dirname(os.path.abspath(__file__)).split('/')[-1]
    runs = glob.glob(os.path.join(save_dir_root, 'run', 'run_*'))
    for r in runs:
        run_id = int(r.split('_')[-1])
        if run_id >= max_id:
            max_id = run_id + 1
    # run_id = int(runs[-1].split('_')[-1]) + 1 if runs else 0

    # Network definition
    if backbone == 'xception':
        net = deeplab_xception_transfer.deeplab_xception_transfer_projection_savemem(
            n_classes=opts.classes,
            os=16,
            hidden_layers=opts.hidden_layers,
            source_classes=7,
        )
    elif backbone == 'resnet':
        # net = deeplab_resnet.DeepLabv3_plus(nInputChannels=3, n_classes=7, os=16, pretrained=True)
        raise NotImplementedError
    else:
        raise NotImplementedError

    if gpu_id >= 0:
        net.cuda()

    # net load weights
    if not opts.loadmodel == '':
        x = torch.load(opts.loadmodel)
        net.load_source_model(x)
        print('load model:', opts.loadmodel)
    else:
        print('no model load !!!!!!!!')

    ## multi scale
    scale_list = [1, 0.5, 0.75, 1.25, 1.5, 1.75]
    testloader_list = []
    testloader_flip_list = []
    for pv in scale_list:
        composed_transforms_ts = transforms.Compose(
            [tr.Scale_(pv),
             tr.Normalize_xception_tf(),
             tr.ToTensor_()])

        composed_transforms_ts_flip = transforms.Compose([
            tr.Scale_(pv),
            tr.HorizontalFlip(),
            tr.Normalize_xception_tf(),
            tr.ToTensor_()
        ])

        voc_val = cihp.VOCSegmentation(split='test',
                                       transform=composed_transforms_ts)
        voc_val_f = cihp.VOCSegmentation(split='test',
                                         transform=composed_transforms_ts_flip)

        testloader = DataLoader(voc_val,
                                batch_size=1,
                                shuffle=False,
                                num_workers=p['num_workers'])
        testloader_flip = DataLoader(voc_val_f,
                                     batch_size=1,
                                     shuffle=False,
                                     num_workers=p['num_workers'])

        testloader_list.append(copy.deepcopy(testloader))
        testloader_flip_list.append(copy.deepcopy(testloader_flip))

    print("Eval Network")

    if not os.path.exists(opts.output_path + 'cihp_output_vis/'):
        os.makedirs(opts.output_path + 'cihp_output_vis/')
    if not os.path.exists(opts.output_path + 'cihp_output/'):
        os.makedirs(opts.output_path + 'cihp_output/')

    start_time = timeit.default_timer()
    # One testing epoch
    total_iou = 0.0
    net.eval()
    for ii, large_sample_batched in enumerate(
            zip(*testloader_list, *testloader_flip_list)):
        print(ii)
        #1 0.5 0.75 1.25 1.5 1.75 ; flip:
        sample1 = large_sample_batched[:6]
        sample2 = large_sample_batched[6:]
        for iii, sample_batched in enumerate(zip(sample1, sample2)):
            inputs, labels = sample_batched[0]['image'], sample_batched[0][
                'label']
            inputs_f, _ = sample_batched[1]['image'], sample_batched[1][
                'label']
            inputs = torch.cat((inputs, inputs_f), dim=0)
            if iii == 0:
                _, _, h, w = inputs.size()
            # assert inputs.size() == inputs_f.size()

            # Forward pass of the mini-batch
            inputs, labels = Variable(inputs,
                                      requires_grad=False), Variable(labels)

            with torch.no_grad():
                if gpu_id >= 0:
                    inputs, labels = inputs.cuda(), labels.cuda()
                # outputs = net.forward(inputs)
                # pdb.set_trace()
                outputs = net.forward(inputs, adj1_test.cuda(),
                                      adj3_test.cuda(), adj2_test.cuda())
                outputs = (outputs[0] +
                           flip(flip_cihp(outputs[1]), dim=-1)) / 2
                outputs = outputs.unsqueeze(0)

                if iii > 0:
                    outputs = F.upsample(outputs,
                                         size=(h, w),
                                         mode='bilinear',
                                         align_corners=True)
                    outputs_final = outputs_final + outputs
                else:
                    outputs_final = outputs.clone()
        ################ plot pic
        predictions = torch.max(outputs_final, 1)[1]
        prob_predictions = torch.max(outputs_final, 1)[0]
        results = predictions.cpu().numpy()
        prob_results = prob_predictions.cpu().numpy()
        vis_res = decode_labels(results)

        parsing_im = Image.fromarray(vis_res[0])
        parsing_im.save(opts.output_path +
                        'cihp_output_vis/{}.png'.format(img_list[ii][:-1]))
        cv2.imwrite(
            opts.output_path + 'cihp_output/{}.png'.format(img_list[ii][:-1]),
            results[0, :, :])
        # np.save('../../cihp_prob_output/{}.npy'.format(img_list[ii][:-1]), prob_results[0, :, :])
        # pred_list.append(predictions.cpu())
        # label_list.append(labels.squeeze(1).cpu())
        # loss = criterion(outputs, labels, batch_average=True)
        # running_loss_ts += loss.item()

        # total_iou += utils.get_iou(predictions, labels)
    end_time = timeit.default_timer()
    print('time use for ' + str(ii) + ' is :' + str(end_time - start_time))

    # Eval
    pred_path = opts.output_path + 'cihp_output/'
    eval_(pred_path=pred_path,
          gt_path=opts.gt_path,
          classes=opts.classes,
          txt_file=opts.txt_file)
Пример #56
0
        matrix[k - 1][j - 1] = 1 + lcs(X, Y, k - 1, j - 1, matrix)
        return matrix[k - 1][j - 1]

    else:  # 최적 부분 구조 성질 2번, 3번

        # store it in arr to avoid further repetitive
        # work in future function calls
        matrix[k - 1][j - 1] = max(lcs(X, Y, k, j - 1, matrix), lcs(X, Y, k - 1, j, matrix))
        return matrix[k - 1][j - 1]


sys.setrecursionlimit(10000)
n = int(input("Size: "))  # 생성할 문자열의 길이
string_pool = string.ascii_uppercase  # 대문자만 이용하여 문자열 생성
X = ""
Y = ""
for i in range(n):  # 랜덤한 문자열 생성
    X += random.choice(string_pool)  # 랜덤한 문자열 하나 선택
    Y += random.choice(string_pool)  # 랜덤한 문자열 하나 선택

print("X: ", X)  # 입력 수열1 출력
print("Y: ", Y)  # 입력 수열2 출력
n = len(Y)  # 입력 수열의 길이 n

dp = [[-1] * (n+1) for _ in range(n+1)]  # 이전에 계산한 값을 저장할 2차원 리스트 선언

t1 = timeit.default_timer()  # LCS 알고리즘 시작시간
print("Length of LCS:", lcs(X, Y, n, n, dp))  # LCS 함수 호출 및 반환값 출력
t2 = timeit.default_timer()  # LCS 알고리즘 종료시간
print("Running time: ", (t2 - t1) * 1000)  # 삽입 정렬 실행시간
Пример #57
0
 def __enter__(self):
     self._start = default_timer()
Пример #58
0
             conn_spec=conn_dict)
A = mmread('../ii.wmat')
#rows, cols = A.nonzero()
nest.Connect(A.row + NE + 1,
             A.col + NE + 1,
             syn_spec=inh_syn_dict,
             conn_spec=conn_dict)

if (not fast):
    spikes = nest.Create("spike_detector", 1, [{
        "label": "va-py-ex",
        "to_file": True
    }])
    spikes_E = spikes[:1]
    nest.Connect(nodes_E[:N_rec], spikes_E)

starttime = timeit.default_timer()
nest.Simulate(simtime)
totaltime = timeit.default_timer() - starttime
print("Real Time Sim: " + str(totaltime) + "s")

if (fast):
    f = open("timefile.dat", "w")
    f.write("%f" % totaltime)
    f.close()

if (not fast):
    rate_iaf = nest.GetStatus(spikes)[0]["n_events"] / (
        (simtime / 1000.0) * N_rec)
    print("Average Rate of recorded electrodes: " + str(rate_iaf) + "Hz")
Пример #59
0
 def __exit__(self, typ, value, traceback):
     # Time can go backwards.
     self._gauge.set(max(default_timer() - self._start, 0))
Пример #60
0
 def __exit__(self, typ, value, traceback):
     # Time can go backwards.
     self._histogram.observe(max(default_timer() - self._start, 0))