예제 #1
0
 def __init__(self):
     self.mc = 0
     self.rtcl = 3
     self.nrtcl = 5
     self.scl = 4
     self.server = 2
     self.current = Node(2, 0, 4)
     self.rt_queue = deque([])
     self.nrt_queue = deque([])
     self.preempt = -1
     self.rt_iat = self.nrt_iat = self.rt_st = self.nrt_st = 0
     # Results
     self.m = 0
     self.b = 0
     self.nrt = 0
     self.nnrt = 0
     self.rt_times = []
     self.nrt_times = []
     self.rt_mean = []
     self.nrt_mean = []
     self.rt_percentile = []
     self.nrt_percentile = []
     self.results = Result()
예제 #2
0
class Simulation:
    def __init__(self):
        self.mc = 0
        self.rtcl = 3
        self.nrtcl = 5
        self.scl = 4
        self.server = 2
        self.current = Node(2, 0, 4)
        self.rt_queue = deque([])
        self.nrt_queue = deque([])
        self.preempt = -1
        self.rt_iat = self.nrt_iat = self.rt_st = self.nrt_st = 0
        # Results
        self.m = 0
        self.b = 0
        self.nrt = 0
        self.nnrt = 0
        self.rt_times = []
        self.nrt_times = []
        self.rt_mean = []
        self.nrt_mean = []
        self.rt_percentile = []
        self.nrt_percentile = []
        self.results = Result()

    def get_inputs(self):
        # Get inputs
        print("\nEnter following details: ")
        self.rt_iat = float(
            input("Enter mean inter-arrival time of RT messages: "))
        self.nrt_iat = float(
            input("Enter mean inter-arrival time of nonRT messages: "))
        self.rt_st = float(input("Enter mean service time of RT message: "))
        self.nrt_st = float(
            input("Enter mean service time of non RT message: "))
        self.m = int(input("Enter number of batches: "))
        self.b = int(input("Enter batch size: "))

    def calculate_time(self, t):
        # Calculate psuedo-random time
        time = (-1 * t) * log(random.random())
        time = round(time, 4)
        return time

    def rt_arrival(self):
        # Arrival of RT Event
        self.mc = self.rtcl
        self.rt_queue.append(Node(1, self.mc, self.calculate_time(self.rt_st)))
        self.rtcl = round(self.mc + self.calculate_time(self.rt_iat), 4)

        if len(
                self.rt_queue
        ) > 0 and self.server == 0:  # RT Queue not empty and server idle
            self.current = self.rt_queue.popleft()
            self.scl = round(self.mc + self.current.time, 4)
            self.server = 1
        elif len(
                self.rt_queue
        ) > 0 and self.server == 2:  # RT Queue not empty and server executing nRT job
            remaining = self.scl - self.mc
            self.current.time = round(remaining, 4)

            if remaining != 0:  # Preempt the current nRT job
                self.preempt = self.current.time
                self.nrt_queue.appendleft(self.current)

            self.current = self.rt_queue.popleft()
            self.scl = round(self.mc + self.current.time, 4)
            self.server = 1

    def nrt_arrival(self):
        # Arrival of nRT Event
        self.mc = self.nrtcl
        self.nrt_queue.append(
            Node(2, self.mc, self.calculate_time(self.nrt_st)))
        self.nrtcl = round(self.mc + self.calculate_time(self.nrt_iat), 4)

        if len(self.nrt_queue
               ) > 0 and self.server == 0:  # Schedule the job is server idle
            self.current = self.nrt_queue.popleft()
            self.scl = round(self.mc + self.current.time, 4)
            self.server = 2

    def service_complete(self):
        # Job completion event
        self.mc = self.scl
        if self.current.type == 1 and self.nrt != self.b:  # Calculate total time for job
            self.nrt += 1
            self.rt_times.append(self.mc - self.current.arrival)
        elif self.current.type == 2 and self.nnrt != self.b:
            self.nnrt += 1
            self.nrt_times.append(self.mc - self.current.arrival)

        if len(self.rt_queue) > 0:  # RT Queue is not empty
            self.current = self.rt_queue.popleft()
            self.scl = round(self.mc + self.current.time, 4)
            self.server = 1
        elif len(self.nrt_queue
                 ) > 0 and self.preempt != -1:  # Schedule preempted event
            self.current = self.nrt_queue.popleft()
            self.scl = round(self.mc + self.current.time, 4)
            self.server = 2
            self.preempt = -1
        elif len(self.nrt_queue) > 0:  # nRT queue is not empty
            self.current = self.nrt_queue.popleft()
            self.scl = round(self.mc + self.current.time, 4)
            self.server = 2
        else:
            self.current = None
            self.server = 0

    def print_status(self):
        # Print current system status
        if self.server == 0:
            data = "{}| {}| {}| {}| {}| {}| {}|".format(
                str(self.mc).ljust(10),
                str(self.rtcl).ljust(10),
                str(self.nrtcl).ljust(10),
                str(len(self.rt_queue)).ljust(10),
                str(len(self.nrt_queue)).ljust(10), '-'.ljust(10),
                str(self.server).ljust(15))
        else:
            data = "{}| {}| {}| {}| {}| {}| {}|".format(
                str(self.mc).ljust(10),
                str(self.rtcl).ljust(10),
                str(self.nrtcl).ljust(10),
                str(len(self.rt_queue)).ljust(10),
                str(len(self.nrt_queue)).ljust(10),
                str(self.scl).ljust(10),
                str(self.server).ljust(15))

        if (self.preempt != -1):
            data += " s={}".format(str(self.preempt))
        print("".ljust(102, '-'))
        print(data)

    def calculate_ci(self):
        # Pop batch 0
        self.rt_mean.pop(0)
        self.nrt_mean.pop(0)
        self.rt_percentile.pop(0)
        self.nrt_percentile.pop(0)

        # Response time for RT
        rt_mean = np.mean(self.rt_mean)
        rt_std = np.std(self.rt_mean)
        rtci_lb = rt_mean - (2.0086 * (rt_std / sqrt(len(self.rt_mean))))
        rtci_ub = rt_mean + (2.0086 * (rt_std / sqrt(len(self.rt_mean))))
        # Percentile for RT
        rt_percentile_mean = np.mean(self.rt_percentile)
        rt_percentile_std = np.std(self.rt_percentile)
        rtci_lb2 = rt_percentile_mean - (
            2.0086 * (rt_percentile_std / sqrt(len(self.rt_mean))))
        rtci_ub2 = rt_percentile_mean + (
            2.0086 * (rt_percentile_std / sqrt(len(self.rt_mean))))

        # Response time for nRT
        nrt_mean = np.mean(self.nrt_mean)
        nrt_std = np.std(self.nrt_mean)
        nrtci_lb = nrt_mean - (2.0086 * (nrt_std / sqrt(len(self.nrt_mean))))
        nrtci_ub = nrt_mean + (2.0086 * (nrt_std / sqrt(len(self.nrt_mean))))
        # Percentile for nRT
        nrt_percentile_mean = np.mean(self.nrt_percentile)
        nrt_percentile_std = np.std(self.nrt_percentile)
        nrtci_lb2 = nrt_percentile_mean - (
            2.0086 * (nrt_percentile_std / sqrt(len(self.rt_mean))))
        nrtci_ub2 = nrt_percentile_mean + (
            2.0086 * (nrt_percentile_std / sqrt(len(self.rt_mean))))

        # Calculate results for plotting graph
        self.results.nrt_miat.append(self.nrt_iat)

        self.results.rt_mean.append(round(rt_mean, 4))
        self.results.rt_percentile.append(round(rt_percentile_mean, 4))
        self.results.rtci.append((round(rtci_lb, 4), round(rtci_ub, 4)))
        self.results.percentile_rtci.append((round(rtci_lb2,
                                                   4), round(rtci_ub2, 4)))

        self.results.nrt_mean.append(round(nrt_mean, 4))
        self.results.nrt_percentile.append(round(nrt_percentile_mean, 4))
        self.results.nrtci.append((round(nrtci_lb, 4), round(nrtci_ub, 4)))
        self.results.percentile_nrtci.append((round(nrtci_lb2,
                                                    4), round(nrtci_ub2, 4)))

        # Print batch results
        print("\n\nMean Inter-arrival time of nRT message: {}".format(
            self.nrt_iat))
        print("Mean of RT response time: {:0.4f}".format(rt_mean))
        print("95th percentile of RT response time: {:0.4f}".format(
            rt_percentile_mean))
        print("Mean of nRT response time: {:0.4f}".format(nrt_mean))
        print("95th percentile of nRT response time: {:0.4f}".format(
            nrt_percentile_mean))
        print("RT Mean Confidence Interval: {:0.4f} - {:0.4f}".format(
            rtci_lb, rtci_ub))
        print("nRT Mean Confidence Interval: {:0.4f} - {:0.4f}".format(
            nrtci_lb, nrtci_ub))
        print("RT Percentile Confidence Interval: {:0.4f} - {:0.4f}".format(
            rtci_lb2, rtci_ub2))
        print("nRT Percentile Confidence Interval: {:0.4f} - {:0.4f}".format(
            nrtci_lb2, nrtci_ub2))

    def process(self):
        # Run the simulation
        while (True):
            if self.nrt == self.b and self.nnrt == self.b:  # Batch complete
                rt_mean = np.mean(self.rt_times)
                nrt_mean = np.mean(self.nrt_times)
                rt_percentile = np.percentile(self.rt_times, 95)
                nrt_percentile = np.percentile(self.nrt_times, 95)
                self.rt_mean.append(round(rt_mean, 4))
                self.nrt_mean.append(round(nrt_mean, 4))
                self.rt_percentile.append(round(rt_percentile, 4))
                self.nrt_percentile.append(round(nrt_percentile, 4))
                # Reset the RT and nRT buffers
                self.rt_times = []
                self.nrt_times = []
                self.nrt = self.nnrt = 0
                self.m -= 1
                if self.m == 0:
                    break

            clocks = []
            # Find which event occurs next
            if self.server == 0:
                clocks = [self.rtcl, self.nrtcl]
            else:
                clocks = [self.rtcl, self.nrtcl, self.scl]

            if self.rtcl == min(clocks):
                self.rt_arrival()
            elif self.nrtcl == min(clocks):
                self.nrt_arrival()
            else:
                self.service_complete()
        # Calculate confidence interval
        self.calculate_ci()

    def reset(self, no_batches):
        # Reset for new batch
        self.mc = 0
        self.rtcl = 3
        self.nrtcl = 5
        self.scl = 4
        self.server = 2
        self.current = Node(2, 0, 4)
        self.rt_queue = deque([])
        self.nrt_queue = deque([])
        self.preempt = -1
        self.nrt = 0
        self.nnrt = 0
        self.rt_times = []
        self.nrt_times = []
        self.rt_mean = []
        self.nrt_mean = []
        self.rt_percentile = []
        self.nrt_percentile = []
        self.m = no_batches

    def run(self):
        # Start Simulation
        self.get_inputs()
        no_batches = self.m

        while (self.nrt_iat <= max_nrt_miat):
            self.reset(no_batches)
            self.process()
            self.nrt_iat += nrt_miat_increments
        self.results.display()
예제 #3
0
from time import time
from experiments.common import DCIclassify
from model.dci import DCI
from model.pivotselection import pivot_selection
import numpy as np
from util.results import Result

optimize = True

mds_home= '../datasets/MDS'
dataset_home='../datasets/Webis-CLS-10'

nfolds=5
outfile = './DCI.varpivot.dat'
if exists(outfile):
    rperf = Result.load(outfile, False)
else:
    rperf = Result(['dataset', 'task', 'method', 'fold', 'npivots', 'acc', 'dci_time', 'svm_time'])


pivot_range = [10,25,50,100,250,500,1000,1500,2000,2500,5000]

for source, target, fold, taskname in MDS_task_generator(abspath(mds_home), nfolds=nfolds):
    s_pivots, t_pivots = pivot_selection(max(pivot_range), source.X, source.y, source.U, target.U,
                                         source.V, target.V,
                                         phi=1, cross=True)

    for npivots in pivot_range:
        for dcf in ['cosine','linear']:
            dci = DCI(dcf=dcf, unify=False, post='normal')
            acc, dci_time, svm_time, _ = DCIclassify(source, target, s_pivots[:npivots], t_pivots[:npivots], dci, optimize=True)
예제 #4
0
import numpy as np
from util.file import *
from quantification.helpers import *
from util.plot_correction import plot_corr
from util.results import Result

VECTORSPATH = '../vectors'

sample_size = 200
samples_by_prevalence = 100

results_table = Result()


def add_results(results, dataset, method, approach):
    for metric, score in results.items():
        results_table.add(dataset=dataset,
                          method=method,
                          approach=approach,
                          metric=metric,
                          score=score)


for vectorset_name in list_dirs(VECTORSPATH):
    print(vectorset_name)
    vectorset = join(VECTORSPATH, vectorset_name)

    train_x = np.load(join(vectorset, 'train.vec.npy'))
    train_y = np.load(join(vectorset, 'train.y.npy'))
    train_y_pred = np.load(join(vectorset, 'train.y_pred.npy'))
    train_y_prob = np.load(join(vectorset, 'train.y_prob.npy'))
예제 #5
0
from data.tasks_topics import Topic_task_generator
from model.dci import DCI
from experiments.common import DCIinduction, pivot_selection_timed
from util.results import Result

optimize = True
npivots = 1000
dcf = 'cosine'

twentynews_home = '../datasets/20news'
sraa_home = '../datasets/SRAA'
reuters_home = '../datasets/Reuters21578'

rperf = Result([
    'dataset', 'task', 'method', 'acc', 'pivot_t', 'dci_t', 'svm_t', 'test_t'
])
for source, target, task, dataset in \
        Topic_task_generator(reuters_home=reuters_home, sraa_home=sraa_home, twenty_home=twentynews_home):

    s_pivots, t_pivots, pivot_time = pivot_selection_timed(npivots,
                                                           source.X,
                                                           source.y,
                                                           source.U,
                                                           target.U,
                                                           source.V,
                                                           target.V,
                                                           phi=1,
                                                           cross=True,
                                                           show=min(
                                                               10, npivots))
from data.tasks import WebisCLS10_task_generator, WebisCLS10_crossdomain_crosslingual_task_generator
from experiments.common import DCIclassify
from model.dci import DCI
from model.pivotselection import pivot_selection
import os
from time import time
from util.results import Result

dcf = 'cosine'
npivots = 450

optimize = True
dataset_home = '../datasets/Webis-CLS-10'

rperf = Result([
    'dataset', 'task', 'method', 'acc', 'pivot_t', 'dci_t', 'svm_t', 'test_t'
])
for source, target, oracle, taskname in WebisCLS10_crossdomain_crosslingual_task_generator(
        os.path.abspath(dataset_home)):

    # pivot selection
    tinit = time()
    s_pivots, t_pivots = pivot_selection(npivots,
                                         source.X,
                                         source.y,
                                         source.U,
                                         target.U,
                                         source.V,
                                         target.V,
                                         oracle=oracle,
                                         phi=30,
예제 #7
0
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.svm import LinearSVC
from data.tasks import WebisCLS10_task_generator
from data.domain import unify_feat_space
import os
from util.results import Result


parameters = {'C': [10 ** i for i in range(-5, 5)]}

dataset_home='../datasets/Webis-CLS-10'

results = Result(['dataset', 'task', 'method', 'acc'])
for source, target, oracle, taskname in WebisCLS10_task_generator(os.path.abspath(dataset_home)):

    # upper
    svm = GridSearchCV(LinearSVC(), parameters, n_jobs=-1, verbose=1, cv=5)
    source, target = unify_feat_space(source, target)
    acc = cross_val_score(svm, target.X, target.y, cv=5).mean()
    results.add(dataset='Webis-CLS-10', task=taskname, method='Upper', acc=acc)

    # lower
    svm = GridSearchCV(LinearSVC(), parameters, n_jobs=-1, verbose=1, cv=5)
    svm.fit(source.X, source.y)
    yte_ = svm.predict(target.X)
    acc = (target.y == yte_).mean()
    results.add(dataset='Webis-CLS-10', task=taskname, method='Lower', acc=acc)

    results.pivot(grand_totals=True)
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.svm import LinearSVC
from data.domain import unify_feat_space
from data.tasks import MDS_task_generator, UpperMDS_task_generator
from os.path import abspath
from util.results import Result


dataset_home='../datasets/MDS'
nfolds=5
upper={}
parameters = {'C': [10 ** i for i in range(-5, 5)]}

results = Result(['dataset', 'task', 'method', 'fold', 'acc'])
for domain in UpperMDS_task_generator(abspath(dataset_home)):
    svm = GridSearchCV(LinearSVC(), parameters, n_jobs=-1, verbose=1, cv=5)
    upper[domain.domain] = cross_val_score(svm, domain.X, domain.y, cv=5).mean()

for source, target, fold, taskname in MDS_task_generator(abspath(dataset_home), nfolds=nfolds):
    svm = GridSearchCV(LinearSVC(), parameters, n_jobs=-1, verbose=1, cv=5)
    target_domain_name = target.domain
    source, target = unify_feat_space(source, target)
    svm.fit(source.X, source.y)
    yte_ = svm.predict(target.X)
    acc = (yte_ == target.y).mean()

    results.add(dataset='MDS', task=taskname, method='Lower', fold=fold, acc=acc)
    results.add(dataset='MDS', task=taskname, method='Upper', fold=fold, acc=upper[target_domain_name])

results.pivot(grand_totals=True)
from sklearn.svm import LinearSVC
from data.domain import *
from sklearn.model_selection import cross_val_score
from data.tasks_topics import Topic_task_generator
from util.results import Result

twentynews_home = '../datasets/20news'
sraa_home = '../datasets/SRAA'
reuters_home = '../datasets/Reuters21578'

results = Result(['dataset', 'task', 'method', 'acc'])
for source, target, task, dataset in \
        Topic_task_generator(reuters_home=reuters_home, sraa_home=sraa_home, twenty_home=twentynews_home):
    source, target = unify_feat_space(source, target)

    svm = LinearSVC()
    svm.fit(source.X, source.y)
    yte_ = svm.predict(target.X)
    acc = (yte_ == target.y).mean()
    results.add(dataset=dataset, task=task, method='Lower', acc=acc)

    upper = cross_val_score(svm, target.X, target.y, cv=5).mean()
    results.add(dataset=dataset, task=task, method='Upper', acc=upper)

results.pivot(grand_totals=True)
예제 #10
0
from os.path import abspath
from time import time
from experiments.common import DCIclassify
from model.dci import DCI
from model.pivotselection import pivot_selection
from util.results import Result

dcf = 'cosine'
npivots = 1000

optimize = True
mds_home = '../datasets/MDS'

nfolds = 5
rperf = Result([
    'dataset', 'task', 'method', 'fold', 'acc', 'pivot_t', 'dci_t', 'svm_t',
    'test_t'
])

for source, target, fold, taskname in MDS_task_generator(abspath(mds_home),
                                                         nfolds=nfolds):

    tinit = time()
    s_pivots, t_pivots = pivot_selection(npivots,
                                         source.X,
                                         source.y,
                                         source.U,
                                         target.U,
                                         source.V,
                                         target.V,
                                         phi=1,
                                         show=min(10, npivots),
예제 #11
0
from sklearn.model_selection import train_test_split
import pickle
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from os.path import join
import scipy
from util.file import *

cuda = torch.device('cuda')

VECTORSPATH = '../vectors'

sample_size = 200
samples_by_prevalence = 100

results_table = Result()


def add_results(results, dataset, method, approach):
    for metric, score in results.items():
        results_table.add(dataset=dataset,
                          method=method,
                          approach=approach,
                          metric=metric,
                          score=score)


for iter in range(10):
    hidden = 64
    drop_p = 0.0
    layers = 2
from experiments.common import pivot_selection_timed, DCItransduction
from model.dci import DCI
import os
from util.results import Result


optimize = False
transductive = True
dcf='cosine'
npivots = 450
svmlight_home='../../svm_light'
dataset_home='../datasets/Webis-CLS-10'

methodname = ('T' if transductive else 'I') + f'DCI'

rperf = Result(['dataset', 'task', 'method', 'acc', 'pivot_t', 'dci_t', 'svm_t', 'test_t'])
for source, target, oracle, taskname in WebisCLS10_task_generator(os.path.abspath(dataset_home)):

    s_pivots, t_pivots, pivot_time = pivot_selection_timed(
        npivots, source.X, source.y, source.U, target.U, source.V, target.V,
        oracle=oracle, phi=30, show=min(20, npivots), cross=False
    )

    dci = DCI(dcf=dcf, unify=True, post='normal')
    acc, dci_time, svm_time, test_time = DCItransduction(
        source, target, s_pivots, t_pivots, dci, svmlight_home, optimize=optimize, transductive=transductive
    )

    rperf.add(dataset='Webis-CLS-10', task=taskname, method=methodname,
              acc=acc,
              pivot_t=pivot_time, dci_t=dci_time, svm_t=svm_time, test_t=test_time)
예제 #13
0
from sklearn.model_selection import cross_val_score
from data.tasks import MDS_task_generator, UpperMDS_task_generator
from data.domain import *
from os.path import abspath
from classification.svmlight import SVMlight
from util.results import Result


dataset_home='../datasets/MDS'
svmlight_home='../../svm_light'

nfolds=5

results = Result(['dataset', 'task', 'method', 'acc'])
for domain in UpperMDS_task_generator(abspath(dataset_home)):
    isvm = SVMlight(svmlightbase=svmlight_home, verbose=0, transduction=None)
    score = cross_val_score(isvm, domain.X, domain.y, cv=nfolds).mean()
    results.add(dataset='MDS', task=domain.domain, method='UPPER', acc=score)
results.pivot(grand_totals=True)

results = Result(['dataset', 'task', 'method', 'fold', 'acc'])
for source, target, fold, task in MDS_task_generator(abspath(dataset_home), nfolds=nfolds):
    source_name = source.domain
    target_name = target.domain
    source, target = unify_feat_space(source, target)

    isvm = SVMlight(svmlightbase=svmlight_home, verbose=0, transduction=None).fit(source.X, source.y)
    tsvm = SVMlight(svmlightbase=svmlight_home, verbose=0, transduction=target.X).fit(source.X, source.y)

    yte_ = isvm.predict(target.X)
    tyte_ = tsvm.transduced_labels