Python ProgressBar примеры, MLIB.utils.ProgBar.ProgressBar Python примеры использования

Пример #1

0

Показать файл

Файл: gaussian_process.py Проект: Bollegala/SST

 def train(self, train_vects):
     # load the train data to a matrix.
     if self.verbose:
         print "Loading the training data to memory..."
     (self.D, self.t) = get_train_data(train_vects)
     # create co-variance matrix C.
     self.n = len(train_vects["vects"])
     self.m = len(train_vects["featIDs"])
     if self.verbose:
         print "Creating the covariance matrix..."
         term = TerminalController()
         progress = ProgressBar(term, "Train instances = %d" % self.n)
     C = zeros((self.n, self.n))
     for i in range(0, self.n):
         if self.verbose:
             progress.update(
                 float(i + 1) / self.n,
                 "Processing instance no. %d" % (i + 1))
         for j in range(i, self.n):
             x_i = self.D[i, :]
             x_j = self.D[j, :]
             val = self.kernel.value(x_i, x_j)
             if i == j:
                 val += 1.0 / float(self.beta)
             C[i, j] = val
             C[j, i] = val
     # compute the inverse.
     if self.verbose:
         print "Computing the inverse of the matrix..."
     self.Cinv = inv(C)
     pass

Пример #2

0

Показать файл

Файл: gaussian_process.py Проект: Bollegala/MLIB

 def train(self, train_vects):
     # load the train data to a matrix.
     if self.verbose:
         print "Loading the training data to memory..."
     (self.D, self.t) = get_train_data(train_vects)
     # create co-variance matrix C.
     self.n = len(train_vects["vects"])
     self.m = len(train_vects["featIDs"])
     if self.verbose:
         print "Creating the covariance matrix..."
         term = TerminalController()
         progress = ProgressBar(term,
                                "Train instances = %d" % self.n)        
     C = zeros((self.n,self.n))
     for i in range(0,self.n):
         if self.verbose:
             progress.update(float(i + 1) / self.n,
                             "Processing instance no. %d" % (i + 1))
         for j in range(i,self.n):
             x_i = self.D[i,:]
             x_j = self.D[j,:]
             val = self.kernel.value(x_i, x_j)
             if i == j:
                 val += 1.0 / float(self.beta)
             C[i,j] = val
             C[j,i] = val
     # compute the inverse.
     if self.verbose:
         print "Computing the inverse of the matrix..."
     self.Cinv = inv(C)                
     pass

Пример #3

0

Показать файл

Файл: estimate.py Проект: dainis-boumber/MLIB

def write_distribution(M, result_fname):
    """
    Compute the row similarity distribution.
    To compute column similarity distribution, transpose
    the matrix first.
    """
    work_queue = Queue()
    lock = Lock()
    distFile = open(result_fname, "w")
    row_ids = M.get_row_id_list()
    no_rows, no_cols = M.shape()
    for (counter, i) in enumerate(row_ids):
        work_queue.put(i)
    term = TerminalController()
    progress = ProgressBar(term,"Total rows = %d, columns = %d"\
                           % (no_rows,no_cols))
    count = 0
    # compute similarity.
    procs = [
        Process(target=do_work,
                args=(work_queue, lock, M, no_rows, distFile, progress))
        for i in range(NO_OF_PROCESSORS)
    ]
    for p in procs:
        p.start()
    for p in procs:
        p.join()
    distFile.close()
    pass

Пример #4

0

Показать файл

def write_distribution(M, result_fname, DIelements):
    """
    Compute the row similarity distribution.
    To compute column similarity distribution, transpose
    the matrix first. if Domain independent row elements are
    given (DIelements), then compute the similarity between
    those elements and all the row elements.
    """
    work_queue = Queue()
    lock = Lock()
    distFile = open(result_fname, "w")
    row_ids = []
    for rowid in DIelements:
        if M.row_exists(rowid):
            row_ids.append(rowid)
    (no_rows, no_cols) = M.shape()
    for (counter, i) in enumerate(row_ids):
        work_queue.put(i)
    term = TerminalController()
    progress = ProgressBar(term,"Total rows = %d, columns = %d"\
                           % (no_rows,no_cols))
    count = 0
    # compute similarity.
    procs = [
        Process(target=do_work,
                args=(work_queue, lock, M, len(row_ids), distFile, progress))
        for i in range(NO_OF_PROCESSORS)
    ]
    for p in procs:
        p.start()
    for p in procs:
        p.join()
    distFile.close()
    pass

Пример #5

0

Показать файл

Файл: seqclust.py Проект: Bollegala/MLIB

 def cluster(self, m, theta):
     #first sort patterns according to the total frequency
     #of all word-pairs in which they appear.
     pats = [] # (pat_id, total_frequency_in_wpairs)
     for pat in m.get_row_id_list():
         row = m.get_row(pat)
         total = 0
         for k in row:
             total += row[k]
         pats.append((pat, total))
     N = len(pats)
     pats.sort(self.patsort)
     #initialize clusters.
     clusts = []
     count = 0
     m.L2_normalize_rows()
     term = TerminalController()
     progress = ProgressBar(term, "Clustering total rows = %d" %N)
     for (pat, total) in pats:
         maxsim = 0
         maxclust = None
         count += 1
         for c in clusts:
             v = m.get_row(pat)
             s = self.sim(c, v)
             if s > maxsim:
                 maxsim = s
                 maxclust = c
         if maxsim > theta:
             progress.update(float(count)/N,
                             "MERGED %d: row = %d freq = %d clusts = %d" \
                             % (count, pat, total, len(clusts)))
             maxclust.merge(pat, m.get_row(pat))
         else:
             progress.update(float(count)/N,
                             "   NEW %d: %s freq = %d clusts = %d" \
                             % (count, pat, total, len(clusts)))
             clusts.append(SEQ_CLUST_DATA(pat, m.get_row(pat)))
     return(clusts)

Пример #6

0

Показать файл

 def cluster(self, m, theta):
     #first sort patterns according to the total frequency
     #of all word-pairs in which they appear.
     pats = []  # (pat_id, total_frequency_in_wpairs)
     for pat in m.get_row_id_list():
         row = m.get_row(pat)
         total = 0
         for k in row:
             total += row[k]
         pats.append((pat, total))
     N = len(pats)
     pats.sort(self.patsort)
     #initialize clusters.
     clusts = []
     count = 0
     m.L2_normalize_rows()
     term = TerminalController()
     progress = ProgressBar(term, "Clustering total rows = %d" % N)
     for (pat, total) in pats:
         maxsim = 0
         maxclust = None
         count += 1
         for c in clusts:
             v = m.get_row(pat)
             s = self.sim(c, v)
             if s > maxsim:
                 maxsim = s
                 maxclust = c
         if maxsim > theta:
             progress.update(float(count)/N,
                             "MERGED %d: row = %d freq = %d clusts = %d" \
                             % (count, pat, total, len(clusts)))
             maxclust.merge(pat, m.get_row(pat))
         else:
             progress.update(float(count)/N,
                             "   NEW %d: %s freq = %d clusts = %d" \
                             % (count, pat, total, len(clusts)))
             clusts.append(SEQ_CLUST_DATA(pat, m.get_row(pat)))
     return (clusts)

Пример #7

0

Показать файл

Файл: seqcoclust.py Проект: Bollegala/SST

 def coclustering(self, M, theta, phi):
     """
     Implements sequential co-clustering.
     (alternation variant)
     """
     # Initialization. sorting row counts.
     cols = []
     rows = []
     columnIndex = {}
     rowIndex = {}
     for rowid in M.get_row_id_list():
         rows.append((rowid,M.get_row_sum(rowid)))
     rows.sort(self.patsort)
     no_rows = len(rows)
     # sorting column counts.
     for colid in M.get_column_id_list():
         cols.append((colid, M.get_column_sum(colid)))
     cols.sort(self.patsort)
     no_cols = len(cols)
     colclusts = {}
     rowclusts = {}
     theta_max = -1
     phi_max = -1
     if not self.VERBOSE:
         term = TerminalController()
         progress = ProgressBar(term,
                                "Clustering rows = %d, columns = %d" % \
                                (no_rows, no_cols))
     total = no_rows + no_cols
     count = 0
     # start alternative clustering.
     while(cols or rows):
         if cols:
             # column clustering.
             count += 1
             current_column = cols[0][0]
             del cols[0]
             theta_max = 0
             max_col_clust = -1
             validClusts = self.get_clusters(rowIndex,
                                             M.get_column(current_column))
             for c in validClusts:
                 s = self.cosine(M.get_column(current_column),
                                 M.get_column(c))
                 if s > theta_max:
                     theta_max = s
                     max_col_clust = c
             if theta_max > theta:
                 colclusts[max_col_clust].append(current_column)
                 self.update_index(rowIndex, M.get_column(current_column),
                                   max_col_clust)
                 M.merge("COLUMNS",max_col_clust,current_column)
                 if self.VERBOSE:
                     print "COL\t%d\tMRG\tSIM=%f\tTotal=(%d,%d) [%d/%d]" % \
                           (current_column,theta_max,
                            len(rowclusts), len(colclusts),
                            count, total)
                 else:
                     progress.update(float(count)/total,\
                                     "COL %d MRG SIM=%f Total=(%d,%d) [%d/%d]" %\
                                     (current_column,theta_max,\
                                      len(rowclusts), len(colclusts),\
                                      count, total))
                     pass                                        
             else:
                 colclusts[current_column] = [current_column]
                 self.update_index(rowIndex, M.get_column(current_column),
                                   current_column)
                 if self.VERBOSE:
                     print "COL\t%d\tNEW\tSIM=%f\tTotal=(%d,%d) [%d/%d]" % \
                           (current_column,theta_max,
                            len(rowclusts), len(colclusts),
                            count, total)
                 else:
                     progress.update(float(count)/total,\
                                     "COL %d NEW SIM=%f Total=(%d,%d) [%d/%d]" % \
                                     (current_column,theta_max,\
                                      len(rowclusts), len(colclusts),\
                                      count, total))
                     pass
         if rows:
             # row clustering.
             count += 1
             current_row = rows[0][0]
             del rows[0]
             phi_max = 0
             max_row_clust = -1
             validClusts = self.get_clusters(columnIndex,
                                             M.get_row(current_row))
             for c in validClusts:
                 s = self.cosine(M.get_row(current_row),
                                 M.get_row(c))
                 if s > phi_max:
                     phi_max = s
                     max_row_clust = c
             if phi_max > phi:
                 rowclusts[max_row_clust].append(current_row)
                 self.update_index(columnIndex, M.get_row(current_row),
                                   max_row_clust)
                 M.merge("ROWS",max_row_clust,current_row)
                 if self.VERBOSE:
                     print "ROW\t%d\tMRG\tSIM=%f\tTotal=(%d,%d) [%d/%d]" % \
                           (current_row,phi_max,
                            len(rowclusts), len(colclusts),
                            count, total)
                 else:
                     progress.update(float(count)/total,\
                                     "ROW %d MRG SIM=%f Total=(%d,%d) [%d/%d]" % \
                                     (current_row,phi_max,
                                      len(rowclusts), len(colclusts),
                                      count, total))
                     pass                                        
             else:
                 rowclusts[current_row] = [current_row]
                 self.update_index(columnIndex, M.get_row(current_row),
                                   current_row)
                 if self.VERBOSE:
                     print "ROW\t%d\tNEW\tSIM=%f\tTotal=(%d,%d) [%d,%d]" % \
                           (current_row,phi_max,
                            len(rowclusts), len(colclusts),
                            count, total)
                 else:
                     progress.update(float(count)/total,\
                                     "ROW %d NEW SIM=%f Total=(%d,%d) [%d/%d]" % \
                                     (current_row,phi_max,
                                      len(rowclusts), len(colclusts),
                                      count, total))
                     pass                 
     # Final steps.
     return (rowclusts,colclusts)

Python ProgressBar примеры использования