def __init__(self): self.mc = 0 self.rtcl = 3 self.nrtcl = 5 self.scl = 4 self.server = 2 self.current = Node(2, 0, 4) self.rt_queue = deque([]) self.nrt_queue = deque([]) self.preempt = -1 self.rt_iat = self.nrt_iat = self.rt_st = self.nrt_st = 0 # Results self.m = 0 self.b = 0 self.nrt = 0 self.nnrt = 0 self.rt_times = [] self.nrt_times = [] self.rt_mean = [] self.nrt_mean = [] self.rt_percentile = [] self.nrt_percentile = [] self.results = Result()
class Simulation: def __init__(self): self.mc = 0 self.rtcl = 3 self.nrtcl = 5 self.scl = 4 self.server = 2 self.current = Node(2, 0, 4) self.rt_queue = deque([]) self.nrt_queue = deque([]) self.preempt = -1 self.rt_iat = self.nrt_iat = self.rt_st = self.nrt_st = 0 # Results self.m = 0 self.b = 0 self.nrt = 0 self.nnrt = 0 self.rt_times = [] self.nrt_times = [] self.rt_mean = [] self.nrt_mean = [] self.rt_percentile = [] self.nrt_percentile = [] self.results = Result() def get_inputs(self): # Get inputs print("\nEnter following details: ") self.rt_iat = float( input("Enter mean inter-arrival time of RT messages: ")) self.nrt_iat = float( input("Enter mean inter-arrival time of nonRT messages: ")) self.rt_st = float(input("Enter mean service time of RT message: ")) self.nrt_st = float( input("Enter mean service time of non RT message: ")) self.m = int(input("Enter number of batches: ")) self.b = int(input("Enter batch size: ")) def calculate_time(self, t): # Calculate psuedo-random time time = (-1 * t) * log(random.random()) time = round(time, 4) return time def rt_arrival(self): # Arrival of RT Event self.mc = self.rtcl self.rt_queue.append(Node(1, self.mc, self.calculate_time(self.rt_st))) self.rtcl = round(self.mc + self.calculate_time(self.rt_iat), 4) if len( self.rt_queue ) > 0 and self.server == 0: # RT Queue not empty and server idle self.current = self.rt_queue.popleft() self.scl = round(self.mc + self.current.time, 4) self.server = 1 elif len( self.rt_queue ) > 0 and self.server == 2: # RT Queue not empty and server executing nRT job remaining = self.scl - self.mc self.current.time = round(remaining, 4) if remaining != 0: # Preempt the current nRT job self.preempt = self.current.time self.nrt_queue.appendleft(self.current) self.current = self.rt_queue.popleft() self.scl = round(self.mc + self.current.time, 4) self.server = 1 def nrt_arrival(self): # Arrival of nRT Event self.mc = self.nrtcl self.nrt_queue.append( Node(2, self.mc, self.calculate_time(self.nrt_st))) self.nrtcl = round(self.mc + self.calculate_time(self.nrt_iat), 4) if len(self.nrt_queue ) > 0 and self.server == 0: # Schedule the job is server idle self.current = self.nrt_queue.popleft() self.scl = round(self.mc + self.current.time, 4) self.server = 2 def service_complete(self): # Job completion event self.mc = self.scl if self.current.type == 1 and self.nrt != self.b: # Calculate total time for job self.nrt += 1 self.rt_times.append(self.mc - self.current.arrival) elif self.current.type == 2 and self.nnrt != self.b: self.nnrt += 1 self.nrt_times.append(self.mc - self.current.arrival) if len(self.rt_queue) > 0: # RT Queue is not empty self.current = self.rt_queue.popleft() self.scl = round(self.mc + self.current.time, 4) self.server = 1 elif len(self.nrt_queue ) > 0 and self.preempt != -1: # Schedule preempted event self.current = self.nrt_queue.popleft() self.scl = round(self.mc + self.current.time, 4) self.server = 2 self.preempt = -1 elif len(self.nrt_queue) > 0: # nRT queue is not empty self.current = self.nrt_queue.popleft() self.scl = round(self.mc + self.current.time, 4) self.server = 2 else: self.current = None self.server = 0 def print_status(self): # Print current system status if self.server == 0: data = "{}| {}| {}| {}| {}| {}| {}|".format( str(self.mc).ljust(10), str(self.rtcl).ljust(10), str(self.nrtcl).ljust(10), str(len(self.rt_queue)).ljust(10), str(len(self.nrt_queue)).ljust(10), '-'.ljust(10), str(self.server).ljust(15)) else: data = "{}| {}| {}| {}| {}| {}| {}|".format( str(self.mc).ljust(10), str(self.rtcl).ljust(10), str(self.nrtcl).ljust(10), str(len(self.rt_queue)).ljust(10), str(len(self.nrt_queue)).ljust(10), str(self.scl).ljust(10), str(self.server).ljust(15)) if (self.preempt != -1): data += " s={}".format(str(self.preempt)) print("".ljust(102, '-')) print(data) def calculate_ci(self): # Pop batch 0 self.rt_mean.pop(0) self.nrt_mean.pop(0) self.rt_percentile.pop(0) self.nrt_percentile.pop(0) # Response time for RT rt_mean = np.mean(self.rt_mean) rt_std = np.std(self.rt_mean) rtci_lb = rt_mean - (2.0086 * (rt_std / sqrt(len(self.rt_mean)))) rtci_ub = rt_mean + (2.0086 * (rt_std / sqrt(len(self.rt_mean)))) # Percentile for RT rt_percentile_mean = np.mean(self.rt_percentile) rt_percentile_std = np.std(self.rt_percentile) rtci_lb2 = rt_percentile_mean - ( 2.0086 * (rt_percentile_std / sqrt(len(self.rt_mean)))) rtci_ub2 = rt_percentile_mean + ( 2.0086 * (rt_percentile_std / sqrt(len(self.rt_mean)))) # Response time for nRT nrt_mean = np.mean(self.nrt_mean) nrt_std = np.std(self.nrt_mean) nrtci_lb = nrt_mean - (2.0086 * (nrt_std / sqrt(len(self.nrt_mean)))) nrtci_ub = nrt_mean + (2.0086 * (nrt_std / sqrt(len(self.nrt_mean)))) # Percentile for nRT nrt_percentile_mean = np.mean(self.nrt_percentile) nrt_percentile_std = np.std(self.nrt_percentile) nrtci_lb2 = nrt_percentile_mean - ( 2.0086 * (nrt_percentile_std / sqrt(len(self.rt_mean)))) nrtci_ub2 = nrt_percentile_mean + ( 2.0086 * (nrt_percentile_std / sqrt(len(self.rt_mean)))) # Calculate results for plotting graph self.results.nrt_miat.append(self.nrt_iat) self.results.rt_mean.append(round(rt_mean, 4)) self.results.rt_percentile.append(round(rt_percentile_mean, 4)) self.results.rtci.append((round(rtci_lb, 4), round(rtci_ub, 4))) self.results.percentile_rtci.append((round(rtci_lb2, 4), round(rtci_ub2, 4))) self.results.nrt_mean.append(round(nrt_mean, 4)) self.results.nrt_percentile.append(round(nrt_percentile_mean, 4)) self.results.nrtci.append((round(nrtci_lb, 4), round(nrtci_ub, 4))) self.results.percentile_nrtci.append((round(nrtci_lb2, 4), round(nrtci_ub2, 4))) # Print batch results print("\n\nMean Inter-arrival time of nRT message: {}".format( self.nrt_iat)) print("Mean of RT response time: {:0.4f}".format(rt_mean)) print("95th percentile of RT response time: {:0.4f}".format( rt_percentile_mean)) print("Mean of nRT response time: {:0.4f}".format(nrt_mean)) print("95th percentile of nRT response time: {:0.4f}".format( nrt_percentile_mean)) print("RT Mean Confidence Interval: {:0.4f} - {:0.4f}".format( rtci_lb, rtci_ub)) print("nRT Mean Confidence Interval: {:0.4f} - {:0.4f}".format( nrtci_lb, nrtci_ub)) print("RT Percentile Confidence Interval: {:0.4f} - {:0.4f}".format( rtci_lb2, rtci_ub2)) print("nRT Percentile Confidence Interval: {:0.4f} - {:0.4f}".format( nrtci_lb2, nrtci_ub2)) def process(self): # Run the simulation while (True): if self.nrt == self.b and self.nnrt == self.b: # Batch complete rt_mean = np.mean(self.rt_times) nrt_mean = np.mean(self.nrt_times) rt_percentile = np.percentile(self.rt_times, 95) nrt_percentile = np.percentile(self.nrt_times, 95) self.rt_mean.append(round(rt_mean, 4)) self.nrt_mean.append(round(nrt_mean, 4)) self.rt_percentile.append(round(rt_percentile, 4)) self.nrt_percentile.append(round(nrt_percentile, 4)) # Reset the RT and nRT buffers self.rt_times = [] self.nrt_times = [] self.nrt = self.nnrt = 0 self.m -= 1 if self.m == 0: break clocks = [] # Find which event occurs next if self.server == 0: clocks = [self.rtcl, self.nrtcl] else: clocks = [self.rtcl, self.nrtcl, self.scl] if self.rtcl == min(clocks): self.rt_arrival() elif self.nrtcl == min(clocks): self.nrt_arrival() else: self.service_complete() # Calculate confidence interval self.calculate_ci() def reset(self, no_batches): # Reset for new batch self.mc = 0 self.rtcl = 3 self.nrtcl = 5 self.scl = 4 self.server = 2 self.current = Node(2, 0, 4) self.rt_queue = deque([]) self.nrt_queue = deque([]) self.preempt = -1 self.nrt = 0 self.nnrt = 0 self.rt_times = [] self.nrt_times = [] self.rt_mean = [] self.nrt_mean = [] self.rt_percentile = [] self.nrt_percentile = [] self.m = no_batches def run(self): # Start Simulation self.get_inputs() no_batches = self.m while (self.nrt_iat <= max_nrt_miat): self.reset(no_batches) self.process() self.nrt_iat += nrt_miat_increments self.results.display()
from time import time from experiments.common import DCIclassify from model.dci import DCI from model.pivotselection import pivot_selection import numpy as np from util.results import Result optimize = True mds_home= '../datasets/MDS' dataset_home='../datasets/Webis-CLS-10' nfolds=5 outfile = './DCI.varpivot.dat' if exists(outfile): rperf = Result.load(outfile, False) else: rperf = Result(['dataset', 'task', 'method', 'fold', 'npivots', 'acc', 'dci_time', 'svm_time']) pivot_range = [10,25,50,100,250,500,1000,1500,2000,2500,5000] for source, target, fold, taskname in MDS_task_generator(abspath(mds_home), nfolds=nfolds): s_pivots, t_pivots = pivot_selection(max(pivot_range), source.X, source.y, source.U, target.U, source.V, target.V, phi=1, cross=True) for npivots in pivot_range: for dcf in ['cosine','linear']: dci = DCI(dcf=dcf, unify=False, post='normal') acc, dci_time, svm_time, _ = DCIclassify(source, target, s_pivots[:npivots], t_pivots[:npivots], dci, optimize=True)
import numpy as np from util.file import * from quantification.helpers import * from util.plot_correction import plot_corr from util.results import Result VECTORSPATH = '../vectors' sample_size = 200 samples_by_prevalence = 100 results_table = Result() def add_results(results, dataset, method, approach): for metric, score in results.items(): results_table.add(dataset=dataset, method=method, approach=approach, metric=metric, score=score) for vectorset_name in list_dirs(VECTORSPATH): print(vectorset_name) vectorset = join(VECTORSPATH, vectorset_name) train_x = np.load(join(vectorset, 'train.vec.npy')) train_y = np.load(join(vectorset, 'train.y.npy')) train_y_pred = np.load(join(vectorset, 'train.y_pred.npy')) train_y_prob = np.load(join(vectorset, 'train.y_prob.npy'))
from data.tasks_topics import Topic_task_generator from model.dci import DCI from experiments.common import DCIinduction, pivot_selection_timed from util.results import Result optimize = True npivots = 1000 dcf = 'cosine' twentynews_home = '../datasets/20news' sraa_home = '../datasets/SRAA' reuters_home = '../datasets/Reuters21578' rperf = Result([ 'dataset', 'task', 'method', 'acc', 'pivot_t', 'dci_t', 'svm_t', 'test_t' ]) for source, target, task, dataset in \ Topic_task_generator(reuters_home=reuters_home, sraa_home=sraa_home, twenty_home=twentynews_home): s_pivots, t_pivots, pivot_time = pivot_selection_timed(npivots, source.X, source.y, source.U, target.U, source.V, target.V, phi=1, cross=True, show=min( 10, npivots))
from data.tasks import WebisCLS10_task_generator, WebisCLS10_crossdomain_crosslingual_task_generator from experiments.common import DCIclassify from model.dci import DCI from model.pivotselection import pivot_selection import os from time import time from util.results import Result dcf = 'cosine' npivots = 450 optimize = True dataset_home = '../datasets/Webis-CLS-10' rperf = Result([ 'dataset', 'task', 'method', 'acc', 'pivot_t', 'dci_t', 'svm_t', 'test_t' ]) for source, target, oracle, taskname in WebisCLS10_crossdomain_crosslingual_task_generator( os.path.abspath(dataset_home)): # pivot selection tinit = time() s_pivots, t_pivots = pivot_selection(npivots, source.X, source.y, source.U, target.U, source.V, target.V, oracle=oracle, phi=30,
from sklearn.model_selection import cross_val_score, GridSearchCV from sklearn.svm import LinearSVC from data.tasks import WebisCLS10_task_generator from data.domain import unify_feat_space import os from util.results import Result parameters = {'C': [10 ** i for i in range(-5, 5)]} dataset_home='../datasets/Webis-CLS-10' results = Result(['dataset', 'task', 'method', 'acc']) for source, target, oracle, taskname in WebisCLS10_task_generator(os.path.abspath(dataset_home)): # upper svm = GridSearchCV(LinearSVC(), parameters, n_jobs=-1, verbose=1, cv=5) source, target = unify_feat_space(source, target) acc = cross_val_score(svm, target.X, target.y, cv=5).mean() results.add(dataset='Webis-CLS-10', task=taskname, method='Upper', acc=acc) # lower svm = GridSearchCV(LinearSVC(), parameters, n_jobs=-1, verbose=1, cv=5) svm.fit(source.X, source.y) yte_ = svm.predict(target.X) acc = (target.y == yte_).mean() results.add(dataset='Webis-CLS-10', task=taskname, method='Lower', acc=acc) results.pivot(grand_totals=True)
from sklearn.model_selection import cross_val_score, GridSearchCV from sklearn.svm import LinearSVC from data.domain import unify_feat_space from data.tasks import MDS_task_generator, UpperMDS_task_generator from os.path import abspath from util.results import Result dataset_home='../datasets/MDS' nfolds=5 upper={} parameters = {'C': [10 ** i for i in range(-5, 5)]} results = Result(['dataset', 'task', 'method', 'fold', 'acc']) for domain in UpperMDS_task_generator(abspath(dataset_home)): svm = GridSearchCV(LinearSVC(), parameters, n_jobs=-1, verbose=1, cv=5) upper[domain.domain] = cross_val_score(svm, domain.X, domain.y, cv=5).mean() for source, target, fold, taskname in MDS_task_generator(abspath(dataset_home), nfolds=nfolds): svm = GridSearchCV(LinearSVC(), parameters, n_jobs=-1, verbose=1, cv=5) target_domain_name = target.domain source, target = unify_feat_space(source, target) svm.fit(source.X, source.y) yte_ = svm.predict(target.X) acc = (yte_ == target.y).mean() results.add(dataset='MDS', task=taskname, method='Lower', fold=fold, acc=acc) results.add(dataset='MDS', task=taskname, method='Upper', fold=fold, acc=upper[target_domain_name]) results.pivot(grand_totals=True)
from sklearn.svm import LinearSVC from data.domain import * from sklearn.model_selection import cross_val_score from data.tasks_topics import Topic_task_generator from util.results import Result twentynews_home = '../datasets/20news' sraa_home = '../datasets/SRAA' reuters_home = '../datasets/Reuters21578' results = Result(['dataset', 'task', 'method', 'acc']) for source, target, task, dataset in \ Topic_task_generator(reuters_home=reuters_home, sraa_home=sraa_home, twenty_home=twentynews_home): source, target = unify_feat_space(source, target) svm = LinearSVC() svm.fit(source.X, source.y) yte_ = svm.predict(target.X) acc = (yte_ == target.y).mean() results.add(dataset=dataset, task=task, method='Lower', acc=acc) upper = cross_val_score(svm, target.X, target.y, cv=5).mean() results.add(dataset=dataset, task=task, method='Upper', acc=upper) results.pivot(grand_totals=True)
from os.path import abspath from time import time from experiments.common import DCIclassify from model.dci import DCI from model.pivotselection import pivot_selection from util.results import Result dcf = 'cosine' npivots = 1000 optimize = True mds_home = '../datasets/MDS' nfolds = 5 rperf = Result([ 'dataset', 'task', 'method', 'fold', 'acc', 'pivot_t', 'dci_t', 'svm_t', 'test_t' ]) for source, target, fold, taskname in MDS_task_generator(abspath(mds_home), nfolds=nfolds): tinit = time() s_pivots, t_pivots = pivot_selection(npivots, source.X, source.y, source.U, target.U, source.V, target.V, phi=1, show=min(10, npivots),
from sklearn.model_selection import train_test_split import pickle from tqdm import tqdm from sklearn.model_selection import StratifiedKFold from os.path import join import scipy from util.file import * cuda = torch.device('cuda') VECTORSPATH = '../vectors' sample_size = 200 samples_by_prevalence = 100 results_table = Result() def add_results(results, dataset, method, approach): for metric, score in results.items(): results_table.add(dataset=dataset, method=method, approach=approach, metric=metric, score=score) for iter in range(10): hidden = 64 drop_p = 0.0 layers = 2
from experiments.common import pivot_selection_timed, DCItransduction from model.dci import DCI import os from util.results import Result optimize = False transductive = True dcf='cosine' npivots = 450 svmlight_home='../../svm_light' dataset_home='../datasets/Webis-CLS-10' methodname = ('T' if transductive else 'I') + f'DCI' rperf = Result(['dataset', 'task', 'method', 'acc', 'pivot_t', 'dci_t', 'svm_t', 'test_t']) for source, target, oracle, taskname in WebisCLS10_task_generator(os.path.abspath(dataset_home)): s_pivots, t_pivots, pivot_time = pivot_selection_timed( npivots, source.X, source.y, source.U, target.U, source.V, target.V, oracle=oracle, phi=30, show=min(20, npivots), cross=False ) dci = DCI(dcf=dcf, unify=True, post='normal') acc, dci_time, svm_time, test_time = DCItransduction( source, target, s_pivots, t_pivots, dci, svmlight_home, optimize=optimize, transductive=transductive ) rperf.add(dataset='Webis-CLS-10', task=taskname, method=methodname, acc=acc, pivot_t=pivot_time, dci_t=dci_time, svm_t=svm_time, test_t=test_time)
from sklearn.model_selection import cross_val_score from data.tasks import MDS_task_generator, UpperMDS_task_generator from data.domain import * from os.path import abspath from classification.svmlight import SVMlight from util.results import Result dataset_home='../datasets/MDS' svmlight_home='../../svm_light' nfolds=5 results = Result(['dataset', 'task', 'method', 'acc']) for domain in UpperMDS_task_generator(abspath(dataset_home)): isvm = SVMlight(svmlightbase=svmlight_home, verbose=0, transduction=None) score = cross_val_score(isvm, domain.X, domain.y, cv=nfolds).mean() results.add(dataset='MDS', task=domain.domain, method='UPPER', acc=score) results.pivot(grand_totals=True) results = Result(['dataset', 'task', 'method', 'fold', 'acc']) for source, target, fold, task in MDS_task_generator(abspath(dataset_home), nfolds=nfolds): source_name = source.domain target_name = target.domain source, target = unify_feat_space(source, target) isvm = SVMlight(svmlightbase=svmlight_home, verbose=0, transduction=None).fit(source.X, source.y) tsvm = SVMlight(svmlightbase=svmlight_home, verbose=0, transduction=target.X).fit(source.X, source.y) yte_ = isvm.predict(target.X) tyte_ = tsvm.transduced_labels