Пример #1
0
 def get_biggest_cluster(xi, yi, yli):
     clusters = DBSCAN(15).fit_predict(yli)
     hist, bins = np.histogram(clusters)
     maxi = np.argmax(hist)
     xi = list(compress(xi, clusters == CornerDetector.get_int_between(bins[maxi], bins[maxi + 1])))
     yi = list(compress(yi, clusters == CornerDetector.get_int_between(bins[maxi], bins[maxi + 1])))
     return xi, yi
Пример #2
0
def create_weight_matrix(userInfo,stats):
    """
    The basic idea used for recommendation is a kind of content based recommendation per user.
    It involves creating a weight matrix (user across coffee properties). The weights are basically
    estimates of ratings by the user depending on the properties of the coffee.
    The individual weights are calculated using the average rating given by this
    user to coffees containing each of the 5 different properties.
    """
    wm = {}
    properties = ["Decaf","Organic","Fair Trade"]
    for currUserId in stats["ids"]:
        wm[currUserId]={}
        for p in properties:
            userInfo[currUserId][p] = {}
            # extracts all the current users rated coffees containing
            # decaf,organic or fair trade property
            userInfo[currUserId][p]["coffees"] = list(coffees for coffees in userInfo[currUserId]["coffees"] if p in coffees)
            userInfo[currUserId][p]["rating"] = list(itertools.compress(userInfo[currUserId]["ratings"], userInfo[currUserId][p]["coffees"]))
            #computes the average coffee rating by the user for a coffee containing the property
            wm[currUserId][p] = sum(userInfo[currUserId][p]["rating"])/len(userInfo[currUserId][p]["rating"])

        for attribute in ("Adjective","Country"):
            wm[currUserId][attribute] = {}
            for k,v in stats[attribute].items():
                curList = list(coffees for coffees in userInfo[currUserId]["coffees"] if k in coffees)
                userInfo[currUserId][k] = {}
                if(len(curList) > 0):
                    userInfo[currUserId][k]["coffees"] = curList
                    userInfo[currUserId][k]["rating"] = list(itertools.compress(userInfo[currUserId]["ratings"], userInfo[currUserId][k]["coffees"]))
                    wm[currUserId][attribute][k] = sum(userInfo[currUserId][k]["rating"])/len(userInfo[currUserId][k]["rating"])
                else:
                    wm[currUserId][attribute][k] = 0
    return wm
Пример #3
0
def load_csv(path, sample=10, only_title=False, include=(), exclude=()):
  from itertools import compress
  import csv

  with open(path) as f:
    titles = f.readline().strip().split(',')

  if include:
    column_compress = [_start_or_end_with(title, include) for title in titles]
  else:
    column_compress = [True] * len(titles)
  if exclude:
    column_compress = [not _start_or_end_with(title, exclude) and tb for title, tb in zip(titles, column_compress)]

  if only_title:
    return list(compress(titles, column_compress))

  with open(path) as f:
    lines = csv.reader(f)
    next(lines)
    result = []
    for i, line in enumerate(lines, 1):
      if sample and i > sample:
        break
      result.append([_load_csv_value_convert(x) for x in compress(line, column_compress)])

  return result
Пример #4
0
    def _import_swiss_manager(self, file_name, elo_default, draw_character):
        with open(file_name) as file:
            file_iter, index_name, index_elo, index_points, index_rounds = self._read_first_line(file)

            # Read the rest of the file and split the fields in the list
            lines = [line.split(";") for line in file_iter]

            # Fill in the information
            self._names = [line[index_name] for line in lines]
            # First, determine which players haven't played any round
            self._opponents = [Tournament._calculate_opponents(line, index_rounds) for line in lines]
            self._number_of_opponents = [len(opps) for opps in self._opponents]
            self._did_not_play = [name for name, opps in zip(self._names, self._number_of_opponents) if opps == 0]

            # Now, save only the relevant players
            self._names = list(compress(self._names, self._number_of_opponents))
            self._elos = [
                Tournament._calculate_elo(line[index_elo], elo_default)
                for line in compress(lines, self._number_of_opponents)
            ]
            self._points = [
                Tournament._calculate_points(line[index_points], draw_character=draw_character)
                for line in compress(lines, self._number_of_opponents)
            ]
            self._played_points = [
                Tournament._calculate_played_points(line, index_rounds, draw_character)
                for line in compress(lines, self._number_of_opponents)
            ]
            self._number_of_opponents = list(compress(self._number_of_opponents, self._number_of_opponents))
            self._number_of_rounds = len(index_rounds)
Пример #5
0
def plot_conditions(eeg, conditions):
    eeg1_full = np.asarray(list(compress(eeg, conditions == 0)))
    eeg2_full = np.asarray(list(compress(eeg, conditions == 1)))

    # draw select trials
    for i in xrange(10):
        plt.subplot(1, 10, i + 1)
        plt.pcolor(eeg1_full[i], cmap=plt.cm.Blues)
    plt.show()

    eeg1 = np.mean(eeg1_full, axis=0)
    eeg2 = np.mean(eeg2_full, axis=0)

    def _plot_heatmap(data):
        return plt.pcolor(data, cmap=plt.cm.Blues)

    # draw between class difference
    plt.subplot(1, 3, 1)
    _plot_heatmap(eeg1)
    plt.subplot(1, 3, 2)
    _plot_heatmap(eeg2)
    plt.subplot(1, 3, 3)
    _plot_heatmap(eeg1-eeg2)
    plt.show()

    # draw within class difference
    plt.subplot(1, 4, 1)
    _plot_heatmap(np.mean(eeg1_full[:(len(eeg1) / 2)], axis=0))
    plt.subplot(1, 4, 2)
    _plot_heatmap(np.mean(eeg1_full[(len(eeg1) / 2):], axis=0))
    plt.subplot(1, 4, 3)
    _plot_heatmap(np.mean(eeg2_full[:(len(eeg2) / 2)], axis=0))
    plt.subplot(1, 4, 4)
    _plot_heatmap(np.mean(eeg2_full[(len(eeg2) / 2):], axis=0))
    plt.show()
Пример #6
0
def single_var_graphs(data):
    """
    This function is for visualizing single variables against labels

    Args:
        data: a list of dictionaries
    """
    # first remove ORF
    for e in data:
        e.pop(ORF, None)

    keys = data[0].keys()
    labels = [int(e[ESSENTIAL]) for e in data]
    not_labels = [0 if l else 1 for l in labels]
    for x in keys[:5]:
        X = [float(e[x]) for e in data]
        xPos = list(compress(X, labels))
        xNeg = list(compress(X, not_labels))

        title = x + ' Essentiality'
        axes = plt.gca()
        axes.set_ylim([-1, 2])
        pos = plt.scatter(xPos, [1] * len(xPos), c='r', alpha=0.5)
        neg = plt.scatter(xNeg, [0] * len(xNeg), c='g', alpha=0.5)
        plt.title(title)
        plt.xlabel(x)
        plt.ylabel('Essentiality')
        plt.legend((pos, neg), ('Essential', 'Non-Essential'), scatterpoints=1)
        plt.show()
Пример #7
0
    def remove_empty_features(self, threshold=10):
        """Remove features with fewer than threshold non-zero entries, save in CSV file."""
        f = open(self.data_file_name)
        self.m = len(f.readline().strip().split("\t"))
        print "Features:", self.m

        f.seek(0)
        dcount = collections.Counter() # non-empty data entries per feature
        for line in f:
            ind = [i for i, x in enumerate(line.strip().split("\t")) if x!="0"]
            dcount.update(ind)

        useful_features = set(k for k, v in dcount.items() if v >= threshold)
        self.m_red = len(useful_features)
        print "Columns with at least %d entries: %d (%4.2f%%)" % (threshold, self.m_red, 100*self.m_red/self.m)

        # column selection on training data set
        f.seek(0)
        selection = [x in useful_features for x in xrange(self.m)]
        fout = file(self.filtered_data_file_name, "w")
        for line in f:
            fout.write("\t".join(itertools.compress(line.strip().split("\t"), selection)) + "\n")
        fout.close()

        # column selection on test data set
        f = open(self.test_file_name)
        fout = file(self.filtered_test_file_name, "w")
        for line in f:
            fout.write("\t".join(itertools.compress(line.strip().split("\t"), selection)) + "\n")
        fout.close()
Пример #8
0
def pairwise_graphs(data):
    """
    This function produces graphs for each pair of features given a list of dictionaries
    A LOT of graphs are produced. I recommend constraining the keys in the loops
    if using show()

    Args:
        data: a list of dictionaries
    """
    # first remove ORF
    for e in data:
        e.pop(ORF, None)

    keys = data[0].keys()
    labels = [int(e[ESSENTIAL]) for e in data]
    notLabels = [0 if l else 1 for l in labels]
    i = 0
    for x in keys:
        X = [float(e[x]) for e in data]
        x_pos = list(compress(X, labels))
        x_neg = list(compress(X, notLabels))
        i += 1
        for y in keys[i:]:
            Y = [float(e[y]) for e in data]
            y_pos = list(compress(Y, labels))
            y_neg = list(compress(Y, notLabels))
            pos = plt.scatter(x_pos, y_pos, c='r', alpha=0.5)
            neg = plt.scatter(x_neg, y_neg, c='g', alpha=0.5)
            title = x + ' vs ' + y
            plt.title(title)
            plt.xlabel(x)
            plt.ylabel(y)
            plt.legend((pos, neg), ('Essential', 'Non-Essential'), scatterpoints=1)
            # plt.show()
            plt.savefig('../data/graphs/' + title + '.png')
Пример #9
0
def getUnobservedInts(fInName, desiredResponsesMask, boolLen, str4true, subsetMask=None, subsetResponses=None):
    
    # Open the csv and get our row iterator
    fIn = open(fInName)
    csv_f = csv.reader(fIn)

    # Skip the header row
    next(csv_f) 

    # Start with all possible responses as integers set ...
    unobservedInts = set(range(2**boolLen))
    # ... and loop through our rows, discarding the observeds
    for row in csv_f:

        if subsetMask == None:

            i = int(''.join(['1' if i in str4true else '0' for i in compress(row,desiredResponsesMask)]), 2)
            unobservedInts.discard(i)

        else:

            if tuple(compress(row, subsetMask)) == subsetResponses:

                i = int(''.join(['1' if i in str4true else '0' for i in compress(row,desiredResponsesMask)]), 2)
                unobservedInts.discard(i)

    fIn.close()

    return unobservedInts
Пример #10
0
    def compute(cls, keys, variables, function, parameters, parallel=None,
                ipython_profile=None, group_name=None):
        """
        Compute profiles by applying the parameters to the function in parallel.

        """
        assert len(keys) == len(parameters)
        njobs = len(parameters)
        parallel = parallel or ParallelProcessor.create_from_legacy(ipython_profile)
        generator = parallel.view('profiles.compute').imap(function, parameters)
        try:
            import progressbar
            progress = progressbar.ProgressBar(widgets=[progressbar.Percentage(), ' ',
                                                        progressbar.Bar(), ' ', 
                                                        progressbar.Counter(), '/', 
                                                        str(njobs), ' ',
                                                        progressbar.ETA()],
                                               maxval=njobs)
            data = list(progress(generator))
        except ImportError:
            data = list(generator)

        for i, (p, r) in enumerate(zip(parameters, data)):
            if r is None:
                logger.info('Retrying failed computation locally')
                data[i] = function(p)

        rowmask = [(l != None) and all(~np.isnan(l)) for l in data]
        import itertools
        data = list(itertools.compress(data, rowmask))
        keys = list(itertools.compress(keys, rowmask))

        return cls(keys, data, variables, group_name=group_name)
def filterByExpression(textList, inputList, prep = True, method = 'AND', index = False, invert = False):
    #filter textList by expressions in inputList
    import itertools
    import re

    if isinstance(inputList, str): #if a string is passed in instead of a list, make it a list
        inputList = [inputList]
    
    if prep:
        expressionList = prepExpressionList(inputList)
    else:
        expressionList = inputList

    matchObjLists = []
    for expression in expressionList:
        matchObjList = []
        for textString in textList:
            matchObjList.append(re.search(expression, textString))
        matchObjLists.append(matchObjList)
    condition = filterByHelper(matchObjLists, method = method)

    if invert:
        condition = [not boolVal for boolVal in condition]

    if index is False:
        return list(itertools.compress(textList, condition))
    else:
        return list(itertools.compress(enumerate(textList), condition))
Пример #12
0
    def apply_filter_by_instances(self, context, classifier_name, all_elements, temp_feat_list):
        """
        Feat_filter indicates the features that must be kept from the pattern file, depending on the features_names
        array.
        :param context:
        :param classifier_name:
        :param all_elements:
        :param temp_feat_list:
        :return:
        """
        feat_filter = self.build_features_filter(len(context["classifiers"][classifier_name]["classes_names"]),
                                                 context["classifiers"][classifier_name]["features_names"],
                                                 temp_feat_list)

        all_elements = np.asarray([list(itertools.compress(x, feat_filter)) for x in all_elements])
        temp_feat_list = list(itertools.compress(temp_feat_list, feat_filter))
        #all_elements contains now the same features than features_name from the classifier context, but unordered

        features_names = context["classifiers"][classifier_name]["features_names"]
        while temp_feat_list != features_names:
            for i, feature_true, feature_all_elements in zip(range(len(features_names)), features_names, temp_feat_list):
                if feature_true != feature_all_elements:
                    temp = copy.deepcopy(all_elements[:, i])
                    all_elements[:, i] = all_elements[:, temp_feat_list.index(feature_true)]
                    all_elements[:, temp_feat_list.index(feature_true)] = temp
                    temp_feature = copy.deepcopy(temp_feat_list[i])
                    old_position = temp_feat_list.index(feature_true)
                    temp_feat_list[i] = feature_true
                    temp_feat_list[old_position] = temp_feature
                    break

        return all_elements
    def subset_cells(self, keep_cells):
        """
        Write a file reduced to just the given cells
        """

        keep_cells = set(keep_cells)
        subset_file_name = self.tag_file_name(c_SUBSET_POSTFIX)
        if subset_file_name is None:
            return(None)

        with self.get_write_handle(subset_file_name) as file_writer:
            csv_writer = csv.writer(file_writer, delimiter=self.delimiter)
            check_handle = self.csv_handle
            keep_cells.add(c_EXPRESSION_00_ELEMENT)
            header = next(check_handle)
            row_1 = next(check_handle)
            if (len(header) == (len(row_1) - 1)) and (c_EXPRESSION_00_ELEMENT not in header):
                header = [c_EXPRESSION_00_ELEMENT] + header
            header_index = [cell in keep_cells for cell in header]
            # Need to add the header rows
            csv_writer.writerow(list(itertools.compress(header,header_index)))
            csv_writer.writerow(list(itertools.compress(row_1,header_index)))
            for file_line in check_handle:
                csv_writer.writerow(list(itertools.compress(file_line,header_index)))
        return(subset_file_name)
Пример #14
0
def josephus(prisoner, kill, surviver):
    p = range(prisoner)
    k = [0] * kill
    k[kill-1] = 1
    s = [1] * kill
    s[kill -1] = 0
    queue = p

    queue = compress(queue, cycle(s))
    try:
        while True:
            p.append(queue.next())
    except StopIteration:
        pass

    kil=[]
    killed = compress(p, cycle(k))
    try:
        while True:
            kil.append(killed.next())
    except StopIteration:
        pass

    print 'The surviver is: ', kil[-surviver:]
    print 'The kill sequence is ', kil[:prisoner-surviver]
Пример #15
0
 def p_call_mol_energy(kind):
     if __class__._lock:
         return None
     else:
         __class__.lock = True
         tmp = [0] * len(__class__.container)
         for i in __class__.to_compute:
             tmp[i] = 1
         my_pool = mproc.Pool()
         if kind == 'full':
             output = [my_pool.apply_async(mol.full_energy_calc)
                       for mol in itertools.compress(__class__.container,
                                                     tmp)]
         elif kind == 'func':
             output = [my_pool.apply_async(mol.func_energy_calc)
                       for mol in itertools.compress(__class__.container,
                                                     tmp)]
         else:
             msg = 'Critical error in implementation'
             lg.critical(msg)
             raise RuntimeError(msg)
         for p in output:
             new_mols = [p.get() for p in output]
         __class__.refresh_container(new_mols)
         # for mol in new_mols:
         #     print(mol.full_energy, mol.myprm_full.sprms)
         my_pool.terminate()
         __class__._lock = False
Пример #16
0
def _euclidean(event_a, event_b):
    """Return the euclidean distance between two points

    Parameters:
    ----------
    event_a, event_b: Event
        Event point

    Output:
    ------
    result: Distance()
    """

    ##some shortcuts
    values_a = event_a.__dict__.values()
    values_b = event_b.__dict__.values()

    ##check if attributes are float to compute euclidean distance
    which_a_attribute_float = is_type(values_a, np.float)
    which_b_attribute_float = is_type(values_b, np.float)
    if which_a_attribute_float == which_b_attribute_float:
        x = np.array(list(compress(values_a, which_a_attribute_float)))
        y = np.array(list(compress(values_b, which_b_attribute_float)))

        return np.linalg.norm(x-y)
Пример #17
0
def main():
    letters = ['a', 'b', 'c', 'd']
    booleans = [True, False, True, True]

    print 'letters is: {}'.format(letters)
    print 'booleans is: {}'.format(booleans)

    compressed = compress(letters, booleans)

    print 'letters and booleans compressed is: {}'.format(list(compressed))

    # what happens if the lists aren't the same length?
    # Answer: dictated by the boolean list.
    letters = ['a', 'b', 'c', 'd', 'e', 'f']
    booleans = [True, False, True, True]
    print 'letters is: {}'.format(letters)
    print 'booleans is: {}'.format(booleans)

    compressed = compress(letters, booleans)

    print 'letters and booleans compressed is: {}'.format(list(compressed))

    letters = ['a', 'b', 'c', 'd']
    booleans = [True, False, True, True, True, True]

    print 'letters is: {}'.format(letters)
    print 'booleans is: {}'.format(booleans)

    compressed = compress(letters, booleans)
Пример #18
0
def interpolate_all_stops(merged_row,tol=100):
    distance_stops = merged_row.shape_stop_dist
    # if the returned object is not a list, it does not contain any information, so there is nothing to interpolate.
    if type(distance_stops) != list:
        return [[]]
    # if there are fewer than 2 pings, no interpolation is possible
    if len(merged_row.recorded_time)<2:
        return [[]]    
    # assemble the ping data as a pandas Series, for convenient use of dropna() method
    list1, list2 = zip(*sorted(zip(merged_row.recorded_time,merged_row.veh_dist_along_shape)))
    veh_pings = pd.Series(index=list1,data=list2)
    veh_pings = veh_pings.dropna()
    # pings must be cleaned for cases when the vehicle "moves backwards" along the route.
    # this may occur when the vehicle is actually finishing another trip, or returning to the first stop
    # the proposed method is to identify the the largest monotonic increasing subsequence
    first, last = longest_inc_range(veh_pings.values,tolerance=tol)
    if len(veh_pings) == 0:
        return [[]]
    valid_pings = veh_pings.iloc[first:last]
    if len(valid_pings)<2:
        return [[]]
    # finally, perform the interpolation with the cleaned data
    x = valid_pings.values
    y = valid_pings.index.values
    f = interpolate.interp1d(x,y)
    xnew = distance_stops
    masker = (xnew > min(x)) & (xnew < max(x))
    xnew = list(compress(xnew,masker))
    # return the estimated times (as timedelta dtype) and the stop labels
    interp_times = pd.to_timedelta(f(xnew),unit='ns')
    return [list(compress(merged_row.shape_stop_id,masker)),interp_times]
Пример #19
0
	def filter_homography_corners(self):
		"""Filter invalid corner based on homography test"""
		logger.info("Filter Corners: Homography")
		optical_flow = self.corners_optical_flow
		corners = self.corners
		num_of_corners = len(corners)
		num_of_cameras = len(optical_flow[0])
		logger.info("Starting Corners Number:" + str(num_of_corners))
		reference_corners = np.float32(corners).reshape(-1, 1, 2)
		
		# initialise a cumulative mask to indicate the validity of corners
		cumulative_mask = np.zeros((num_of_corners, 1))
		# examine the flow of each corner, see if it passes homography test in each image.
		for i in xrange(0, num_of_cameras):
			current_corners = np.float32([pt[i] for pt in optical_flow]).reshape(-1, 1, 2)
			M, mask = cv2.findHomography(reference_corners, current_corners, cv2.RANSAC, 5.0)
			cumulative_mask = cumulative_mask + mask
		cumulative_mask = cumulative_mask.flatten().tolist()
		# defines how many passes are required in a valid flow
		min_pass = num_of_cameras * (1.0 - self.homography_params['ransacThreshold'])
		for j in xrange(0, num_of_corners):
			if (cumulative_mask[j] < min_pass):
				cumulative_mask[j] = False
			else:
				cumulative_mask[j] = True
		# remove invalid corners and its optical flow
		itertools.compress(optical_flow, cumulative_mask)
		itertools.compress(corners, cumulative_mask)
		logger.info("Result Corner Number: " + str(len(corners)))
		# update corner list and optical flow
		self.corners_optical_flow = optical_flow
		self.corners = corners
		logger.info("Finished Homography filtering, updating Corners and Optical Flow")
Пример #20
0
	def get_pred_series(self, pred):
		loci = self.get_loci()

		zero_loci = list(compress(loci, np.logical_not(self.feats.get_nonzero())))
		s = pd.Series([self.label_obj.int_to_str_dict[i] for i in pred], index=compress(loci, self.feats.get_nonzero()))
		if len(zero_loci) > 0:
			s = s.append(pd.Series(['no_data',]*len(zero_loci), index=zero_loci))
		return s
def n_fold_cross_validation(data, n, true_oracle, random_seed=None):
    if random_seed is not None:
        data = list(data)
        random_function = random.Random(random_seed).random
        random.shuffle(data, random_function)
    train_test_bit_maps = KFold(len(data), n)
    return ((list(compress(data, train)), list(compress(data, test)))  
           for train, test in train_test_bit_maps)
Пример #22
0
def plot_pe(sam_fpath):
  p = SAM.SAMParser()
  reads = list(p.parse(sam_fpath))
  pes = []
  for r1,r2 in izip(reads[::2],reads[1::2]):
    if swap(r1.pos, r2.pos):
      r1,r2 = r2,r1
    pes.append(PE(r1,r2))
  
  coords = [pe.get_coord() for pe in pes]
  x,y = zip(*coords)
  minx,maxx=min(x),max(x)
  miny,maxy=min(y),max(y)
  contour_plot(minx,maxx,miny,maxy,coords)

  n_r,n_f = len(Reverse), len(Forward)
  plt.vlines(Reverse, ymin=miny,ymax=maxy, colors='r', alpha=0.1)
  plt.hlines(Reverse, xmin=miny,xmax=maxy, colors='r', alpha=0.1)
  plt.vlines(Forward, ymin=minx,ymax=maxx, colors='b', alpha=0.1)
  plt.hlines(Forward, xmin=minx,xmax=maxx, colors='b', alpha=0.1)

  read_length = 300
  xorients,yorients = zip(*[pe.get_orient() for pe in pes])
  xorients,yorients = na.array(xorients,dtype=bool), na.array(yorients,dtype=bool)

  points = na.array(coords)
  direct_x = points.copy()
  direct_x[:,0] += read_length
  paths_x = [Path(na.array([i,j])) for i,j in izip(points,direct_x)]


  direct_y = points.copy()
  direct_y[:,1] += read_length
  paths_y = [Path(na.array([i,j])) for i,j in izip(points,direct_y)]
  
  rev_c = PathCollection(list(chain(compress(paths_x,xorients),compress(paths_y,yorients))),
                         edgecolors='r',linewidths=2)
  for_c = PathCollection(list(chain(compress(paths_x,na.logical_not(xorients)),compress(paths_y,na.logical_not(yorients)))),edgecolors='b',linewidths=2)

  ax = plt.gca()
  ax.add_collection(rev_c)
  ax.add_collection(for_c)
  ax.set_xlabel('met')
  ax.set_ylabel('ptprz1')

  """
  x,y = zip(*[pe.get_coord() for pe in compress(pes, xorients)])
  plt.scatter(x,y,marker=TICKLEFT,c='r')
  x,y = zip(*[pe.get_coord() for pe in compress(pes, na.logical_not(xorients))])
  plt.scatter(x,y,marker=TICKRIGHT,c='b')

  x,y = zip(*[pe.get_coord() for pe in compress(pes, yorients)])
  plt.scatter(x,y,marker=TICKDOWN,c='r')
  x,y = zip(*[pe.get_coord() for pe in compress(pes, na.logical_not(yorients))])
  plt.scatter(x,y,marker=TICKUP,c='b')
  """

  plt.show()
Пример #23
0
def calc_fit(model, metric, train_x, train_y, test_x, test_y, p):
    train_x = map(lambda x: list(compress(x, p)), train_x)
    test_x = map(lambda x: list(compress(x, p)), test_x)
    clf = model.fit(train_x, train_y)
    predictions = clf.predict(test_x)
    if metric == 'precision': return precision_score(test_y, predictions, [0, 1])
    elif metric == 'recall': return recall_score(test_y, predictions, [0, 1])
    elif metric == 'accuracy': return accuracy_score(test_y, predictions, [0, 1])
    return precision_score(test_y, predictions, [0, 1]) + recall_score(test_y, predictions, [0, 1]) + accuracy_score(test_y, predictions, [0, 1])
Пример #24
0
def find_sundays(days_of_week, month, year):
	if len(month) != 28 or year % 4 > 0:
		return list((s for s in compress(month, days_of_week)))
	else:
		leap_february = []
		for i in month:
			leap_february.append(i)
		leap_february.append(29)
		return list((s for s in compress(leap_february, days_of_week)))
Пример #25
0
    def _select_date_range(self, lines):
        """Identify lines containing headers within the range begin_date to end_date.

        Parameters
        -----
        lines: list
            list of lines from the IGRA2 data file.

        """
        headers = []
        num_lev = []
        dates = []

        # Get indices of headers, and make a list of dates and num_lev
        for idx, line in enumerate(lines):
            if line[0] == '#':
                year, month, day, hour = map(int, line[13:26].split())

                # All soundings have YMD, most have hour
                try:
                    date = datetime.datetime(year, month, day, hour)
                except ValueError:
                    date = datetime.datetime(year, month, day)

                # Check date
                if self.begin_date <= date <= self.end_date:
                    headers.append(idx)
                    num_lev.append(int(line[32:36]))
                    dates.append(date)
                if date > self.end_date:
                    break

        if len(dates) == 0:
            # Break if no matched dates.
            # Could improve this later by showing the date range for the station.
            raise ValueError('No dates match selection.')

        # Compress body of data into a string
        begin_idx = min(headers)
        end_idx = max(headers) + num_lev[-1]

        # Make a boolean vector that selects only list indices within the time range
        selector = np.zeros(len(lines), dtype=bool)
        selector[begin_idx:end_idx + 1] = True
        selector[headers] = False
        body = ''.join([line for line in itertools.compress(lines, selector)])

        selector[begin_idx:end_idx + 1] = ~selector[begin_idx:end_idx + 1]
        header = ''.join([line for line in itertools.compress(lines, selector)])

        # expand date vector to match length of the body dataframe.
        dates_long = np.repeat(dates, num_lev)

        return body, header, dates_long, dates
Пример #26
0
	def get_pred_dataframe(self, pred):
		loci = self.get_loci()
		zero_loci = list(compress(loci, np.logical_not(self.feats.get_nonzero())))

		df = pd.DataFrame(pred, index=compress(loci,self.feats.get_nonzero())).rename(columns=self.label_obj.int_to_str_dict)

		nan_df = pd.DataFrame([[-np.inf]*len(df.columns),]*len(zero_loci), index=zero_loci).rename(columns=self.label_obj.int_to_str_dict)
		df['best'] = df.apply(lambda s:s.argmax(), axis=1)
		nan_df['best'] = 'no_data'

		df = pd.concat((df,nan_df))
		return df
Пример #27
0
def verify(taken, objective, values, weight, weights):
    """
    """
    import itertools
    
    taken_value_sum  = sum(itertools.compress(values, taken))
    taken_weight_sum = sum(itertools.compress(weights, taken))
    
    if objective != taken_value_sum:
        print 'objective {} does not match taken_value_sum: {}, taken: {}'.format(objective, taken_value_sum, taken)
    
    if weight != taken_weight_sum:
        print 'weight {} does not match taken_weight_sum: {}, taken: {}'.format(weight, taken_weight_sum, taken)
Пример #28
0
    def evaluate_guess(self, guess):
        assert len(guess) == len(self.secret)
        correct_num = sum((map((lambda x, y: x == y), self.secret, guess)))
        incorrect_filter = list(map((lambda x, y: x != y), self.secret, guess))
        remaining_secret = list(compress(self.secret, incorrect_filter))
        incorrect_guesses = list(compress(guess, incorrect_filter))

        incorrect_guesses_num = 0
        for g in incorrect_guesses:
            if g in remaining_secret:
                remaining_secret.remove(g)
                incorrect_guesses_num += 1

        return correct_num, incorrect_guesses_num
Пример #29
0
    def compute(cls, keys, variables, function, parameters, ipython_profile=None,
                group_name=None):
        """
        Compute profiles by applying the parameters to the function in parallel.

        """
        assert len(keys) == len(parameters)
        njobs = len(parameters)
        if isinstance(ipython_profile, LSFView):
            view = ipython_profile
            logger.debug('Running %d jobs on LSF' % view.njobs)
            generator = view.imap(function, parameters)
        elif ipython_profile:
            from IPython.parallel import Client, LoadBalancedView
            client = Client(profile=ipython_profile)
            view = client.load_balanced_view()
            logger.debug('Running %d jobs' % njobs)
            generator = view.imap(function, parameters)
        elif ipython_profile == False:
            generator = (function(p) for p in parameters)
        else:
            from multiprocessing import Pool, cpu_count
            import threading
            view = Pool()
            logger.debug('Running %d jobs on %d local CPU%s' % (njobs, cpu_count(), ' s'[cpu_count() > 1]))
            generator = view.imap(function, parameters)
        try:
            import progressbar
            progress = progressbar.ProgressBar(widgets=[progressbar.Percentage(), ' ',
                                                        progressbar.Bar(), ' ', 
                                                        progressbar.Counter(), '/', 
                                                        str(njobs), ' ',
                                                        progressbar.ETA()],
                                               maxval=njobs)
            data = list(progress(generator))
        except ImportError:
            data = list(generator)

        for i, (p, r) in enumerate(zip(parameters, data)):
            if r is None:
                logger.info('Retrying failed computation locally')
                data[i] = function(p)

        rowmask = [(l != None) and all(~np.isnan(l)) for l in data]
        import itertools
        data = list(itertools.compress(data, rowmask))
        keys = list(itertools.compress(keys, rowmask))

        return cls(keys, data, variables, group_name=group_name)
Пример #30
0
def findsilence(y,sr,ind_i):
    hop = int(round(sr*0.2)) #hop and width defines search window
    width = sr*0.2
    n_slice = int(len(y)/hop)
    starts = np.arange(n_slice)*hop
    ends = starts+width
    if hop != width:
        cutoff = np.argmax(ends>len(y))
        starts = starts[:cutoff]
        ends = ends[:cutoff]
        n_slice = len(starts)
    mask = map(lambda i: np.dot(y[starts[i]:ends[i]],y[starts[i]:ends[i]])/width, range(n_slice)) < 0.04 * np.dot(y,y)/len(y)
    starts =  list(compress(starts+ind_i,mask))
    ends = list(compress(ends+ind_i,mask))
    return zip(starts,ends)
Пример #31
0
def vowel(c):
    return c.lower() in 'aeiou'


print("list(filter(vowel, 'Aardvark')) {}\n----------".format(
    list(filter(vowel, 'Aardvark'))))
print("list(itertools.filterfalse(vowel, 'Aardvark')) {}\n----------".format(
    list(itertools.filterfalse(vowel, 'Aardvark'))))
print("list(itertools.dropwhile(vowel, 'Aardvark')) {}\n----------".format(
    list(itertools.dropwhile(vowel, 'Aardvark'))))
print("list(itertools.takewhile(vowel, 'Aardvark')) {}\n----------".format(
    list(itertools.takewhile(vowel, 'Aardvark'))))
print(
    "list(itertools.compress('Aardvark', (1, 0, 1, 1, 0, 1))) {}\n----------".
    format(list(itertools.compress('Aardvark', (1, 0, 1, 1, 0, 1)))))
print("list(itertools.islice('Aardvark', 4)) {}\n----------".format(
    list(itertools.islice('Aardvark', 4))))
print("list(itertools.islice('Aardvark', 4, 7)) {}\n----------".format(
    list(itertools.islice('Aardvark', 4, 7))))
print("list(itertools.islice('Aardvark', 1, 7, 2)) {}\n----------".format(
    list(itertools.islice('Aardvark', 1, 7, 2))))

sample = [5, 4, 2, 8, 7, 6, 3, 0, 9, 1]
print("list(itertools.accumulate(sample)) {}\n----------".format(
    list(itertools.accumulate(sample))))
print("list(itertools.accumulate(sample, min)) {}\n----------".format(
    list(itertools.accumulate(sample, min))))
print("list(itertools.accumulate(sample, max)) {}\n----------".format(
    list(itertools.accumulate(sample, max))))
import operator
Пример #32
0
def get_old_filenames(files, matches):
    return list(compress(files, matches))
Пример #33
0
    def do_export(self,
                  settings,
                  selected_features,
                  progress_slot,
                  lane_index,
                  filename_suffix=""):
        """
        Implements ExportOperator.do_export(settings, selected_features, progress_slot
        Most likely called from ExportOperator.export_object_data
        :param settings: the settings for the exporter, see
        :param selected_features:
        :param progress_slot:
        :param lane_index: Ignored. (This is a single-lane operator. It is the caller's responsibility to make sure he's calling the right lane.)
        :param filename_suffix: If provided, appended to the filename (before the extension).
        :return:
        """

        obj_count = list(objects_per_frame(self.LabelImage))  # slow
        divisions = self.divisions
        t_range = (0, self.LabelImage.meta.shape[
            self.LabelImage.meta.axistags.index("t")])
        oid2tid, _ = self._getObjects(t_range, None)  # slow
        max_tracks = max(
            max(map(len, i.values())) if map(len, i.values()) else 0
            for i in oid2tid.values())
        ids = ilastik_ids(obj_count)

        file_path = settings["file path"]
        if filename_suffix:
            path, ext = os.path.splitext(file_path)
            file_path = path + "-" + filename_suffix + ext

        export_file = ExportFile(file_path)
        export_file.ExportProgress.subscribe(progress_slot)
        export_file.InsertionProgress.subscribe(progress_slot)

        export_file.add_columns("table", range(sum(obj_count)), Mode.List,
                                Default.KnimeId)
        export_file.add_columns("table", list(ids), Mode.List,
                                Default.IlastikId)
        export_file.add_columns(
            "table", oid2tid, Mode.IlastikTrackingTable, {
                "max": max_tracks,
                "counts": obj_count,
                "extra ids": {},
                "range": t_range
            })
        export_file.add_columns("table", self.ObjectFeatures,
                                Mode.IlastikFeatureTable,
                                {"selection": selected_features})

        if divisions:
            ott = partial(self.lookup_oid_for_tid, oid2tid)
            divs = [(value[1], ott(key, value[1]), key,
                     ott(value[0][0], value[1] + 1), value[0][0],
                     ott(value[0][1], value[1] + 1), value[0][1])
                    for key, value in sorted(divisions.iteritems(),
                                             key=itemgetter(0))]
            assert sum(Default.ManualDivMap) == len(divs[0])
            names = list(
                compress(Default.DivisionNames["names"], Default.ManualDivMap))
            export_file.add_columns("divisions",
                                    divs,
                                    Mode.List,
                                    extra={"names": names})

        if settings["file type"] == "h5":
            export_file.add_rois(Default.LabelRoiPath, self.LabelImage,
                                 "table", settings["margin"], "labeling")
            if settings["include raw"]:
                export_file.add_image(Default.RawPath, self.RawImage)
            else:
                export_file.add_rois(Default.RawRoiPath, self.RawImage,
                                     "table", settings["margin"])
        export_file.write_all(settings["file type"], settings["compression"])

        export_file.ExportProgress.unsubscribe(progress_slot)
        export_file.InsertionProgress.unsubscribe(progress_slot)
Пример #34
0

def is_int(val):
    try:
        x = int(val)
        return True
    except ValueError:
        return False


ivals = list(filter(is_int, values))
print(ivals)

#####
from itertools import compress

addresses = [
    '5412 N CLARK',
    '5148 N CLARK',
    '5800 E 58TH',
    '2122 N CLARK'
    '5645 N RAVENSWOOD',
    '1060 W ADDISON',
    '4801 N BROADWAY',
    '1039 W GRANVILLE',
]
counts = [0, 3, 10, 4, 1, 7, 6, 1]
more5 = [n > 5 for n in counts]
print(more5)
print(list(compress(addresses, more5)))
Пример #35
0
import itertools
gen = itertools.count(1, .5)
next(gen)
gen = itertools.takewhile(lambda n: n<3, itertools.count(1, .5))
list(gen)

def aritprog_gen(begin, step, end=None):
    first = type(begin + step)(begin)
    ap_gen = itertools.count(first, step)
    if end is not None:
        ap_gen = itertools.takewhile(lambda n: n<end, ap_gen)
    return ap_gen

# filtering
itertools.compress(it, selector_it)
itertools.dropwhile(predicate, it)
filter(predicate, it)
itertools.filterfalse(predicate, it)
itertools.islice(it.stop)
itertools.islice(it, start, stop, step=1)
itertools.takewhile(predicate, it)

# mapping
itertools.accumulate(it, [func])
enumerate(it, start=0)
map(func, it1, [it2, ..., itN])
itertools.starmap(func, it)

# merging
itertools.chain(it1, ..., itN)
Пример #36
0
def test_int_bool_envvars_have_ensurers():
    bool_ints = [type(envvar) in [bool, int] for envvar in DEFAULT_VALUES.values()]
    key_mask = set(itertools.compress(DEFAULT_VALUES.keys(), bool_ints))
    ensurer_keys = set(DEFAULT_ENSURERS.keys())
    assert len(key_mask.intersection(ensurer_keys)) == len(key_mask)
Пример #37
0
# itertools_compress.py

from itertools import compress, cycle

every_third = cycle([False, False, True])
data = range(1, 10)

for i in compress(data, every_third):
    print(i, end=' ')
print()
def combinations_count(n, r):
    r = min(r, n - r)
    numer = reduce(mul, range(n, n - r, -1), 1)
    denom = reduce(mul, range(1, r + 1), 1)
    return numer // denom


c = combinations_count(n, 2)

for i in range(2**c):
    s = -1

    edges_flag = [False] * c

    for j in range(c):
        if (i >> j) & 1:
            edges_flag[j] = True

    edges_active = list(compress(edges, edges_flag))

    edges_nb = [0] * n

    # 各ノードごとに隣接する頂点を足していく
    for e in edges_active:
        edges_nb[e[0] - 1] += e[1]
        edges_nb[e[1] - 1] += e[0]

    if min(edges_nb) == max(edges_nb) != 0:
        print(f"S={min(edges_nb)}")
        print(*edges_active, sep="\n")
Пример #39
0
    def fit(self, X, y):
        """Build an ensemble of BOSS classifiers from the training set (X,
        y), either through randomising over the para
         space to make a fixed size ensemble quickly or by creating a
         variable size ensemble of those within a threshold
         of the best
        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, 1]
            Nested dataframe with univariate time-series in cells.
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """

        X, y = check_X_y(X, y, enforce_univariate=True)
        y = y.values if isinstance(y, pd.Series) else y

        self.time_limit = self.time_limit * 60
        self.n_instances, self.series_length = X.shape[0], len(X.iloc[0, 0])
        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]
        for index, classVal in enumerate(self.classes_):
            self.class_dictionary[classVal] = index

        self.classifiers = []
        self.weights = []

        # Window length parameter space dependent on series length

        max_window_searches = self.series_length / 4
        max_window = int(self.series_length * self.max_win_len_prop)
        win_inc = int((max_window - self.min_window) / max_window_searches)
        if win_inc < 1:
            win_inc = 1

        # cBOSS
        if self.randomised_ensemble:
            possible_parameters = self._unique_parameters(max_window, win_inc)
            num_classifiers = 0
            start_time = time.time()
            train_time = 0
            subsample_size = int(self.n_instances * 0.7)
            lowest_acc = 0
            lowest_acc_idx = 0

            rng = check_random_state(self.random_state)

            if self.time_limit > 0:
                self.n_parameter_samples = 0

            while (train_time < self.time_limit or num_classifiers <
                   self.n_parameter_samples) and len(possible_parameters) > 0:
                parameters = possible_parameters.pop(
                    rng.randint(0, len(possible_parameters)))

                subsample = rng.choice(self.n_instances,
                                       size=subsample_size,
                                       replace=False)
                X_subsample = X.iloc[subsample, :]
                y_subsample = y[subsample]

                boss = BOSSIndividual(*parameters,
                                      alphabet_size=self.alphabet_size,
                                      save_words=False,
                                      random_state=self.random_state)
                boss.fit(X_subsample, y_subsample)
                boss._clean()

                boss.accuracy = self._individual_train_acc(
                    boss, y_subsample, subsample_size, lowest_acc)
                weight = math.pow(boss.accuracy, 4)

                if num_classifiers < self.max_ensemble_size:
                    if boss.accuracy < lowest_acc:
                        lowest_acc = boss.accuracy
                        lowest_acc_idx = num_classifiers
                    self.weights.append(weight)
                    self.classifiers.append(boss)

                elif boss.accuracy > lowest_acc:
                    self.weights[lowest_acc_idx] = weight
                    self.classifiers[lowest_acc_idx] = boss
                    lowest_acc, lowest_acc_idx = self._worst_ensemble_acc()

                num_classifiers += 1
                train_time = time.time() - start_time
        # BOSS
        else:
            max_acc = -1
            min_max_acc = -1

            for i, normalise in enumerate(self.norm_options):
                for win_size in range(self.min_window, max_window + 1,
                                      win_inc):
                    boss = BOSSIndividual(win_size,
                                          self.word_lengths[0],
                                          normalise,
                                          self.alphabet_size,
                                          save_words=True,
                                          random_state=self.random_state)
                    boss.fit(X, y)

                    best_classifier_for_win_size = boss
                    best_acc_for_win_size = -1

                    # the used work length may be shorter
                    best_word_len = boss.transformer.word_length

                    for n, word_len in enumerate(self.word_lengths):
                        if n > 0:
                            boss = boss._shorten_bags(word_len)

                        boss.accuracy = self._individual_train_acc(
                            boss, y, self.n_instances, best_acc_for_win_size)

                        # print(win_size, boss.accuracy)
                        if boss.accuracy >= best_acc_for_win_size:
                            best_acc_for_win_size = boss.accuracy
                            best_classifier_for_win_size = boss
                            best_word_len = word_len

                    if self._include_in_ensemble(best_acc_for_win_size,
                                                 max_acc, min_max_acc,
                                                 len(self.classifiers)):
                        best_classifier_for_win_size._clean()
                        best_classifier_for_win_size._set_word_len(
                            best_word_len)
                        self.classifiers.append(best_classifier_for_win_size)

                        # print("appending", best_acc_for_win_size, win_size)
                        if best_acc_for_win_size > max_acc:
                            max_acc = best_acc_for_win_size
                            self.classifiers = list(
                                compress(self.classifiers, [
                                    classifier.accuracy >=
                                    max_acc * self.threshold
                                    for c, classifier in enumerate(
                                        self.classifiers)
                                ]))

                        min_max_acc, min_acc_ind = \
                            self._worst_ensemble_acc()

                        if len(self.classifiers) > self.max_ensemble_size:
                            if min_acc_ind > -1:
                                del self.classifiers[min_acc_ind]
                                min_max_acc, min_acc_ind = \
                                    self._worst_ensemble_acc()

            self.weights = [1 for n in range(len(self.classifiers))]

        self.n_estimators = len(self.classifiers)
        self.weight_sum = np.sum(self.weights)

        self._is_fitted = True
        return self
Пример #40
0
    start = i - input_length
    end = i

    for l1 in range(start, end - 1):
        for l2 in range(l1 + 1, end):
            if lines[l1] + lines[l2] == next_number:
                ok_numbers.append(lines[i])

all_numbers = lines[input_length:]

invalid_number = list(set(all_numbers) - set(ok_numbers))[0]
print(invalid_number)
# 177777905


# part 2
list_to_search = list(compress(
    lines, [l != invalid_number for l in lines]
))

def getFirstContiguousSet(list_to_search, target_number):
    for i in range(1, len(list_to_search) - 1):
        for j in range(i + 1, len(list_to_search)):
            contiguous_set = list_to_search[i:j]
            if sum(contiguous_set) == target_number:
                return(contiguous_set)

correct_contiguous_sets = getFirstContiguousSet(list_to_search, invalid_number)
print(min(correct_contiguous_sets) + max(correct_contiguous_sets))
# 23463012
Пример #41
0
            u_var_diff = 0.
        else:
            try:

                u_var_diff = lidar_processing_vol_averaging(
                    u_rot, frequency, mode_ws, mode_vol)

            except:
                u_var_diff = 0.

        u_var_vol = u_var_noise + u_var_diff
        TI_ZephIR_vol_avg_all[i] = (np.sqrt(u_var_vol) / U) * 100

#Extract TI values and timestamps for all times when corrected TI value is valid
mask = ~np.isnan(TI_ZephIR_vol_avg_all)

timestamp_10min_all = list(compress(timestamp_10min_all, mask))
TI_ZephIR_orig_all = TI_ZephIR_orig_all[mask]
TI_ZephIR_vol_avg_all = TI_ZephIR_vol_avg_all[mask]

#Reduce number of decimal places in output TI data
TI_orig_temp = ["%0.2f" % i for i in TI_ZephIR_orig_all]
TI_corrected_temp = ["%0.2f" % i for i in TI_ZephIR_vol_avg_all]

#Write out timestamp, original lidar TI, and corrected lidar TI
with open(main_directory + 'L_TERRA_corrected_TI_ZephIR.csv', 'a') as fp:
    a = csv.writer(fp, delimiter=',')
    data = np.vstack([timestamp_10min_all, TI_orig_temp,
                      TI_corrected_temp]).transpose()
    a.writerows(data)
Пример #42
0
from itertools import compress, product
import re

with open('14_input.txt') as f:
  program = [(re.match(r'(mem)\[(\d+)\] = (\d+)', l) or re.match(r'(mask) = ([01X]+)', l)).groups() for l in f]
  program = [(c[0], None, c[1]) if c[0] == 'mask' else (c[0], int(c[1]), int(c[2])) for c in program]

masks, mem_pt1, mem_pt2 = {}, {}, {}
for instr, addr, arg in program:
  if instr == 'mask':
    masks = {d: reduce(lambda a, b: a * 2 + (1 if b == d else 0), arg, 0) for d in ('0', '1')}
    masks.update({'X': [2**(35-idx) for idx, bit in enumerate(arg) if bit == 'X']})
  else:
    mem_pt1[addr] = (arg | masks['1']) & ~masks['0']
    for sel in product((0, 1), repeat=len(masks['X'])):
      on, off = (sum(compress(masks['X'], sel)) | masks['1'],
                 sum(compress(masks['X'], map(lambda s: 1-s, sel))))
      mem_pt2[(addr | on) & ~off] = arg
    
print 'part 1: %d' % sum(mem_pt1.values())
print 'part 2: %d' % sum(mem_pt2.values())

Пример #43
0
ivals = list(filter(is_int, values))
print(ivals)

import math

print([math.sqrt(n) for n in mylist if n > 0])
print()

clip_neg = [n if n > 0 else 0 for n in mylist]
print(clip_neg)
clip_pos = [n if n < 0 else 0 for n in mylist]
print(clip_pos)
print()

addresses = [
    '5412 N CLARK',
    '5148 N CLARK',
    '5800 E 58TH',
    '2122 N CLARK',
    '5645 N RAVENSWOOD',
    '1060 N ADDISON',
    '4801 N BROADWAY',
    '1039 N GRANVILLE'
]
counts = [0, 3, 10, 4, 1, 7, 6, 1]
from itertools import compress
more5 = [n > 5 for n in counts]
print(more5)
re = list(compress(addresses, more5))
print(re)
Пример #44
0
def OA_request(paralist, product='atl06'):
    """
    Request data from OpenAltimetry based on API
    Inputs:
        paralist: [trackId, Date, cycle, bbox]
            trackId: RGT number
            beamlist: list of beam number
            cycle: cycle number
            bbox: DEM bounding box
        product: ICESat-2 product
    Output:
        track_df: dataframe for all beams of one RGT
    """
    points = []  # store all beam data for one RGT
    trackId, Date, cycle, bbox = paralist[0], paralist[1], paralist[
        2], paralist[3]
    # iterate all six beams
    for beam in cn.beamlist:
        # Generate API
        payload = {
            'product': product,
            'endDate': Date,
            'minx': str(bbox[0]),
            'miny': str(bbox[1]),
            'maxx': str(bbox[2]),
            'maxy': str(bbox[3]),
            'trackId': trackId,
            'beamName': beam,
            'outputFormat': 'json'
        }

        # request OpenAltimetry
        r = requests.get(cn.base_url, params=payload)

        # get elevation data
        elevation_data = r.json()

        # length of file list
        file_len = len(elevation_data['data'])

        # file index satifies aqusition time from data file
        idx = [
            elevation_data['data'][i]['date'] == Date
            for i in np.arange(file_len)
        ]

        # get data we need
        beam_data = list(compress(elevation_data['data'], idx))

        if not beam_data:
            continue

        # elevation array
        beam_elev = beam_data[0]['beams'][0]['lat_lon_elev']

        if not beam_elev:  # check if no data
            continue  # continue to next beam

        for p in beam_elev:
            points.append({
                'lat': p[0],
                'lon': p[1],
                'h': p[2],
                'beam': beam,
                'cycle': cycle,
                'time': Date
            })

    track_df = pd.DataFrame.from_dict(points)

    return track_df
Пример #45
0
        no.add(feature)
    elif option == "m":
        maybe.add(feature)
    elif option == "e":
        print("Exiting...")
        break
    else:
        print("Unrecognised option")
        continue

    # Disable the feature
    X[:, featureIdx] = 0
    remaining = selector.sum()

isSelected = (selector > 0).todense().flat
selectedJobs = compress(jobs, isSelected)

getTitleAndDesc = lambda job: job[jobTitleField] + " - " + job[
    jobDescritpionField][:100]
selectedJobs = map(getTitleAndDesc, selectedJobs)

print("[Remaining entries]")
print("\n".join(selectedJobs))

print()
print("[Preference of words]")
print("Yes: ", end="")
print(yes)
print("No: ", end="")
print(no)
print("Maybe: ", end="")
Пример #46
0
    inner_block = []
    for item in fm[i].split("</gml:coordinates></gml:LinearRing></gml:innerBoundaryIs>"):
        if "<gml:innerBoundaryIs><gml:LinearRing><gml:coordinates>" in item:
            inner_block.append(item [ item.find("<gml:innerBoundaryIs><gml:LinearRing><gml:coordinates>")+
                                    len("<gml:innerBoundaryIs><gml:LinearRing><gml:coordinates>") : ])
    if not inner_block:
        inner.append([])
        inner_count.append(0)
    else:
        inner.append([[[float(v6) for v6 in v5] for v5 in v4] for v4 in 
                 [[v3.split(',') for v3 in v2] for v2 in 
                  [v.split(' ') for v in inner_block]]])
        inner_count.append(len(inner[-1]))

dn1 = [v==1 for v in dn]
outer1 = list(compress(outer, dn1))
inner1 = list(compress(inner, dn1))
inner_count1 = list(compress(inner_count, dn1))


#%% Domain KML
print('Creating domain kml...')

c_domain = 'AB0000FF'
c_empty = '00000000'


s = []
s = """<?xml version="1.0" encoding="UTF-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2">
<Document>
Пример #47
0
    def block_election_status(self, block_id, voters):
        """Tally the votes on a block, and return the status: valid, invalid, or undecided."""

        votes = list(backend.query.get_votes_by_block_id(self.connection, block_id))
        n_voters = len(voters)

        voter_counts = collections.Counter([vote['node_pubkey'] for vote in votes])
        for node in voter_counts:
            if voter_counts[node] > 1:
                raise exceptions.MultipleVotesError(
                    'Block {block_id} has multiple votes ({n_votes}) from voting node {node_id}'
                    .format(block_id=block_id, n_votes=str(voter_counts[node]), node_id=node))

        if len(votes) > n_voters:
            raise exceptions.MultipleVotesError('Block {block_id} has {n_votes} votes cast, but only {n_voters} voters'
                                                .format(block_id=block_id, n_votes=str(len(votes)),
                                                        n_voters=str(n_voters)))

        # vote_cast is the list of votes e.g. [True, True, False]
        vote_cast = [vote['vote']['is_block_valid'] for vote in votes]
        # prev_block are the ids of the nominal prev blocks e.g.
        # ['block1_id', 'block1_id', 'block2_id']
        prev_block = [vote['vote']['previous_block'] for vote in votes]
        # vote_validity checks whether a vote is valid
        # or invalid, e.g. [False, True, True]
        vote_validity = [self.consensus.verify_vote(voters, vote) for vote in votes]

        # element-wise product of stated vote and validity of vote
        # vote_cast = [True, True, False] and
        # vote_validity = [False, True, True] gives
        # [True, False]
        # Only the correctly signed votes are tallied.
        vote_list = list(compress(vote_cast, vote_validity))

        # Total the votes. Here, valid and invalid refer
        # to the vote cast, not whether the vote itself
        # is valid or invalid.
        n_valid_votes = sum(vote_list)
        n_invalid_votes = len(vote_cast) - n_valid_votes

        # The use of ceiling and floor is to account for the case of an
        # even number of voters where half the voters have voted 'invalid'
        # and half 'valid'. In this case, the block should be marked invalid
        # to avoid a tie. In the case of an odd number of voters this is not
        # relevant, since one side must be a majority.
        if n_invalid_votes >= math.ceil(n_voters / 2):
            return Bigchain.BLOCK_INVALID
        elif n_valid_votes > math.floor(n_voters / 2):
            # The block could be valid, but we still need to check if votes
            # agree on the previous block.
            #
            # First, only consider blocks with legitimate votes
            prev_block_list = list(compress(prev_block, vote_validity))
            # Next, only consider the blocks with 'yes' votes
            prev_block_valid_list = list(compress(prev_block_list, vote_list))
            counts = collections.Counter(prev_block_valid_list)
            # Make sure the majority vote agrees on previous node.
            # The majority vote must be the most common, by definition.
            # If it's not, there is no majority agreement on the previous
            # block.
            if counts.most_common()[0][1] > math.floor(n_voters / 2):
                return Bigchain.BLOCK_VALID
            else:
                return Bigchain.BLOCK_INVALID
        else:
            return Bigchain.BLOCK_UNDECIDED
 def _compress_iterable(iterable, selectors):
     return compress(iterable, selectors)
Пример #49
0
        corr_radio = radio - fitcorr(temp)
        corrrms = np.std(corr_radio)
        a_corrrms = np.append(a_corrrms, corrrms)
        if corrrms < limrms:
            fil.append(True)
            a_radio = np.append(a_radio, radio)
            a_temp = np.append(a_temp, temp)
        else:
            fil.append(False)
    if np.max(a_corrrms) < finallimrms:
        print 'np.max(a_corrrms) = ', np.max(a_corrrms)
        isgood = True
    else:
        limrms = np.max(a_corrrms)

    selected = list(compress(selected, fil))
    fit = np.polyfit(a_temp, a_radio, 1)
    fitcorr = np.poly1d(fit)

figcorrrms = plt.figure()
figcorrrms.suptitle(stname)
plt.hist(a_corrrms, bins=50)
plt.xlabel('rms (after corr) [ADC]')
if save:
    outnamecorrrms = utils.outname(
        constant.plotfolder + '/' + stname + '_corrrms_' + extfile, '.png')
    plt.savefig(outnamecorrrms + '.png')

figlastcorrel = plt.figure()
figlastcorrel.suptitle(stname)
plt.plot(a_temp, a_radio, '.')
Пример #50
0
for file in sorted(os.listdir(cwd)):
    if file.endswith(".p"):
        df_temp = pickle.load(open(cwd + '/' + file, "rb"))
        df = pd.concat([df, df_temp])

        del df_temp
        gc.collect()

        FileCounter = FileCounter + 1
        print(str(FileCounter) + ' file(s) loaded!')
# ========================================================================
#Fix MIC feature issue
if np.sum(df.columns == 'MIC') > 1:
    LogicalMICpos = df.columns == 'MIC'
    from itertools import compress
    MICpos = list(compress(range(len(LogicalMICpos)), LogicalMICpos))
    if len(MICpos) > 1:
        WronPos = MICpos[-1]
        FixedColumn = pd.DataFrame(index=df.index, columns=['MICf'])
        FixedColumn['MICf'] = df.iloc[:, WronPos]
        df = pd.concat(
            [df.iloc[:, :WronPos], FixedColumn, df.iloc[:, WronPos + 1:]],
            axis=1)

# All indices:
INDonMETA = np.arange(df.shape[0])

# training

RAWdfFEATURES = df.columns.values[2:]
pickle.dump(RAWdfFEATURES, open(OUTPUTFOLDER + '/Features.p', 'wb'))
Пример #51
0
from itertools import compress  # noqa
import matplotlib
import matplotlib.pyplot as plt
import sphinx_gallery.backreferences

from local_module import N  # N = 1000

t = np.arange(N) / float(N)
win = np.hanning(N)
print(is_color_like('r'))
fig, ax = plt.subplots()
ax.plot(t, win, color='r')
ax.text(0, 1, 'png', size=40, va='top')
fig.tight_layout()
orig_dpi = 80. if matplotlib.__version__[0] < '2' else 100.
assert plt.rcParams['figure.dpi'] == orig_dpi
plt.rcParams['figure.dpi'] = 70.
assert plt.rcParams['figure.dpi'] == 70.
listy = [0, 1]
compress('abc', [0, 0, 1])
warn('This warning should show up in the output', RuntimeWarning)
x = Figure()  # plt.Figure should be decorated (class), x shouldn't (inst)
# nested resolution resolves to numpy.random.mtrand.RandomState:
rng = np.random.RandomState(0)
# test Issue 583
sphinx_gallery.backreferences.identify_names([('text', 'Text block', 1)])
# 583: methods don't link properly
dc = sphinx_gallery.backreferences.DummyClass()
dc.run()
print(dc.prop)
Пример #52
0
def count_entities(document):
    """Counts frequency of entities
    Parameters
    ----------
    document : string
        String containing document text

    Returns
    -------
    token_count_df : pandas.Dataframe
        Dataframe containing columns ['Tokens', 'Types', 'Counts']
    """

    matcher = Matcher(nlp.vocab, validate=True)
    # Define patterns
    organsiation = [{
        "ENT_TYPE": "ORG",
        "POS": "PROPN"
    }, {
        "ENT_TYPE": "ORG",
        "OP": "*"
    }]
    person = [{
        "ENT_TYPE": "PERSON",
        "POS": "PROPN"
    }, {
        "ENT_TYPE": "PERSON",
        "OP": "+"
    }]
    # Add match patterns
    matcher.add("Organisation", None, organsiation)
    matcher.add("Person", None, person)
    # Analyse document
    doc = nlp(document)
    matches = matcher(doc)
    # Extract matches
    types = []
    start_index = []
    end_index = []
    substrings = []
    for match_id, start, end in matches:
        string_id = nlp.vocab.strings[match_id]  # Get string representation
        span = doc[start:end]  # The matched span
        types.append(string_id)
        start_index.append(start)
        end_index.append(end)
        substrings.append(span.text)
    # Apply "greedy" logic
    s = np.array(start_index)
    e = np.array(end_index)
    ok = s > 0
    for i, (start, end) in enumerate(zip(s, e)):
        # Is token part of a larger span elsewhere in the list?
        is_substr = np.logical_or(np.logical_and(s > start, e <= end),
                                  np.logical_and(s >= start, e < end))
        # Remove substrings
        ok = np.logical_and(ok, ~is_substr)
    types = list(compress(types, ok))
    substrings = list(compress(substrings, ok))
    # Generate dataframe
    entities_df = pd.DataFrame({'Types': types, 'Tokens': substrings})
    entities_df = entities_df.set_index('Tokens')
    entities_df = entities_df.reset_index(drop=True, inplace=True)
    counts_df = count_tokens(substrings)
    return pd.concat([counts_df, entities_df], axis=1, join='inner')
Пример #53
0
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]
true_selector = [n>5 for n in counts]

print("true_selector:", true_selector)

addresses = [
    '5412 N CLARK',
    '5148 N CLARK',
    '5800 E 58TH',
    '2122 N CLARK',
    '5645 N RAVENSWOOD',
    '1060 W ADDISON',
    '4801 N BROADWAY',
    '1039 W GRANVILLE',
]

#  compress(data, selectors) --> iterator over selected data
#  |
#  |  Return data elements corresponding to true selector elements.
#  |  Forms a shorter iterator from selected data elements using the
#  |  selectors to choose the data elements.

new_address = list(compress(addresses, true_selector))
print(new_address)

# 这里的关键点在于先创建一个 Boolean 序列,指示哪些元素符合条件。然后
# compress() 函数根据这个序列去选择输出对应位置为 True 的元素。
# 和 filter() 函数类似,compress() 也是返回的一个迭代器。因此,如果你需要得
# 到一个列表,那么你需要使用 list() 来将结果转换为列表类型。
def ParseReviews(review_url):
    review_url = review_url + "&sortBy=recent"  # change it to sort by dates
    # the URL is a string starting with "/gp/...." that is a product from parsing a booklist
    page = download(
        "https://www.amazon.com" + review_url +
        "&pageNumber=1")  # this is the review page, not the product page

    ASIN = review_url[20:30]

    product_url = "https://www.amazon.com/dp/product/" + ASIN
    product_site = download(product_url)

    parser = html.fromstring(page.text)  # full review
    tree = html.fromstring(product_site.content)  # product page

    # publishing date
    XPATH_CONTENT = '//div[@class="content"]/div'

    book_info = tree.xpath(XPATH_CONTENT)

    publish_date = ''
    publisher = ''
    for cont in book_info:
        t_cont = cont.xpath('//li//text()')
        for x in t_cont:  #./div//div//i//text()
            temp_search = re.findall(r'\w+ \d+, \d{4}', x)
            if (
                    len(temp_search) == 1
            ):  # length represents the number of strings found that matches this pattern
                publish_date = temp_search
                publisher = re.split(r'[;\(]', x)[0]

    # Sales Ranking
    XPATH_RANK = '//li[@id="SalesRank"]/text()'
    rank_contained = tree.xpath(XPATH_RANK)

    sales_rank = int(re.findall(r'\d+', rank_contained[1])[0])

    # Book name
    XPATH_PRODUCT_NAME = '//h1//span[@id="productTitle"]//text()'
    raw_product_name = tree.xpath(XPATH_PRODUCT_NAME)
    product_name = ''.join(raw_product_name).strip()

    # Number of reviews
    xpath_numberReviews = '//span[@class="a-size-base"][@id="acrCustomerReviewText"]//text()'
    raw_n_reviews = tree.xpath(xpath_numberReviews)
    n_reviews = int(re.findall(r'\d+', raw_n_reviews[0])[0])

    # Average book rating from all reviews
    xpath_rating = '//a//i[@class="a-icon a-icon-star a-star-4-5"]//span[@class="a-icon-alt"]//text()'
    raw_rating = tree.xpath(xpath_rating)[0]
    agg_rating = float(re.findall(r'[^ab-z]+ ', raw_rating)[0])

    # rating dictionary of all review breakup percentages
    XPATH_AGGREGATE_RATING = '//table[@id="histogramTable"]//tr'
    total_ratings = tree.xpath(XPATH_AGGREGATE_RATING)

    ratings_dict = {}

    #grabing the rating  section in product page
    for ratings in total_ratings:
        extracted_rating = ratings.xpath('./td//a//text()')
        if extracted_rating:
            rating_key = extracted_rating[0]
            raw_raing_value = extracted_rating[1]
            rating_value = raw_raing_value
            if rating_key:
                ratings_dict.update({rating_key: rating_value})

    # find the maximum page number for all the reviews
    t_review_links = parser.xpath('//a/@href')
    index_link = list(
        map(lambda x: re.search(r'pageNumber=', x) != None, t_review_links))  #

    page_links = list(compress(t_review_links, index_link))
    page_N = int(re.findall(r'\d+',
                            page_links[-2])[-1])  # number for the total pages

    #page_N = int(re.findall(r'\d+$',page_links[-2])[0]) # number for the total pages
    page_N = min(500, page_N)  # reduce the maximum number of pages

    # first parse the first page, or full review page 1
    XPATH_REVIEW_TEXT = '//span[@class="a-size-base review-text"][contains(@data-hook,"review-body")]//text()'
    reviews_temp = parser.xpath(XPATH_REVIEW_TEXT)

    # also parse the review header in the first page
    XPATH_REVIEW_HEADER = '//a[@data-hook="review-title"]//text()'
    reviews_title = parser.xpath(XPATH_REVIEW_HEADER)

    # then add the rest of the review pages
    for page_n in range(2, page_N + 1, 1):  # USE PAGE_N found above
        temp_url = 'https://www.amazon.com' + review_url + '&pageNumber=' + str(
            page_n)
        temp_page = download(temp_url)
        temp_parser = html.fromstring(temp_page.text)
        reviews_temp += temp_parser.xpath(XPATH_REVIEW_TEXT)
        reviews_title += temp_parser.xpath(XPATH_REVIEW_HEADER)

    data = {
        'url': product_url,
        'name': product_name,
        'ASIN': ASIN,
        'publisher': publisher,
        'publish_date': publish_date,
        'sales_rank': sales_rank,
        'number_reviews': n_reviews,
        'average_rating': agg_rating,
        'rating_perc': ratings_dict,
        'reviews': reviews_temp,
        'review_titles': reviews_title
    }
    return data
Пример #55
0
    def pack_fgd(self, vmf: VMF, fgd: FGD) -> None:
        """Analyse the map to pack files. We use the FGD to easily handle this."""
        # Don't show the same keyvalue warning twice, it's just noise.
        unknown_keys = set()

        # Definitions for the common keyvalues on all entities.
        try:
            base_entity = fgd['_CBaseEntity_']
        except KeyError:
            LOGGER.warning('No CBaseEntity definition!')
            base_entity = EntityDef(EntityTypes.BASE)

        for ent in vmf.entities:
            # Allow opting out packing specific entities.
            if conv_bool(ent.keys.pop('srctools_nopack', '')):
                continue

            classname = ent['classname']
            try:
                ent_class = fgd[classname]
            except KeyError:
                if classname not in unknown_keys:
                    LOGGER.warning('Unknown class "{}"!', classname)
                    unknown_keys.add(classname)
                # Fall back to generic keyvalues.
                ent_class = base_entity

            if ent['skinset'] != '':
                # Special key for us - if set this is a list of skins this
                # entity is pledging it will restrict itself to.
                skinset = {int(x)
                           for x in ent.keys.pop('skinset').split()
                           }  # type: Optional[Set[int]]
            else:
                skinset = None

            value: str

            for key in set(ent.keys) | set(ent_class.kv):
                # These are always present on entities, and we don't have to do
                # any packing for them.
                # Origin/angles might be set (brushes, instances) even for ents
                # that don't use them.
                if key in ('classname', 'hammerid', 'origin', 'angles', 'skin',
                           'pitch', 'skinset'):
                    continue
                elif key == 'model':
                    # Models are set on all brush entities, and are always either
                    # a '*37' brush ref, a model, or a sprite.
                    value = ent[key]
                    if value and value[:1] != '*':
                        self.pack_file(value, skinset=skinset)
                    continue
                try:
                    kv = ent_class.kv[key]  # type: KeyValues
                    val_type = kv.type
                    default = kv.default
                except KeyError:
                    # Suppress this error for unknown classes, we already
                    # showed a warning above.
                    if ent_class is not base_entity and (
                            classname, key) not in unknown_keys:
                        unknown_keys.add((ent_class.classname, key))
                        LOGGER.warning(
                            'Unknown keyvalue "{}" for ent of type "{}"!', key,
                            ent['classname'])
                    continue

                value = ent[key, default]

                # Ignore blank values, they're not useful.
                if not value:
                    continue

                if val_type is KVTypes.STR_MATERIAL:
                    self.pack_file(value, FileType.MATERIAL)
                elif val_type is KVTypes.STR_MODEL:
                    self.pack_file(value, FileType.MODEL)
                elif val_type is KVTypes.EXT_STR_TEXTURE:
                    self.pack_file(value, FileType.TEXTURE)
                elif val_type is KVTypes.STR_VSCRIPT:
                    for script in value.split():
                        self.pack_file('scripts/vscripts/' + script)
                elif val_type is KVTypes.STR_VSCRIPT_SINGLE:
                    self.pack_file('scripts/vscripts/' + value)
                elif val_type is KVTypes.STR_SPRITE:
                    if not value.casefold().startswith('sprites/'):
                        value = 'sprites/' + value
                    if not value.casefold().startswith('materials/'):
                        value = 'materials/' + value

                    self.pack_file(value, FileType.MATERIAL)
                elif val_type is KVTypes.STR_SOUND:
                    self.pack_soundscript(value)

        # Handle resources that's coded into different entities with our
        # internal database.
        # Use compress() to skip classnames that have no ents.
        for classname in itertools.compress(vmf.by_class.keys(),
                                            vmf.by_class.values()):
            try:
                res = CLASS_RESOURCES[classname]
            except KeyError:
                continue
            if callable(res):
                # Different stuff is packed based on keyvalues, so call a function.
                for ent in vmf.by_class[classname]:
                    res(self, ent)
            else:
                # Basic dependencies, if they're the same for any copy of this ent.
                for file, filetype in res:
                    self.pack_file(file, filetype)

        # Handle worldspawn here - this is fairly special.
        sky_name = vmf.spawn['skyname']
        for suffix in ['bk', 'dn', 'ft', 'lf', 'rt', 'up']:
            self.pack_file(
                'materials/skybox/{}{}.vmt'.format(sky_name, suffix),
                FileType.MATERIAL,
            )
            self.pack_file(
                'materials/skybox/{}{}_hdr.vmt'.format(sky_name, suffix),
                FileType.MATERIAL,
                optional=True,
            )
        self.pack_file(vmf.spawn['detailmaterial'], FileType.MATERIAL)

        detail_script = vmf.spawn['detailvbsp']
        if detail_script:
            self.pack_file(detail_script, FileType.GENERIC)
            try:
                with self.fsys:
                    detail_props = self.fsys.read_prop(detail_script, 'ansi')
            except FileNotFoundError:
                LOGGER.warning('detail.vbsp file does not exist: "{}"',
                               detail_script)
            except Exception:
                LOGGER.warning('Could not parse detail.vbsp file: ',
                               exc_info=True)
            else:
                # We only need to worry about models, the sprites are a single
                # sheet packed above.
                for prop in detail_props.iter_tree():
                    if prop.name == 'model':
                        self.pack_file(prop.value, FileType.MODEL)
Пример #56
0
def printout(prefix, path, src, trg, outdir, origoutdir, cleanorigoutdir, garbageoutdir,
             tokoutdir, cleantokoutdir, morphtokoutdir, cdectokoutdir, cdectoklcoutdir,
             agiletokoutdir, agiletoklcoutdir, morphoutdir, posoutdir,
             agiletokpath, cdectokpath, cleanpath, docdec,
             stp=lputil.selected_translation_pairs, el=lputil.extract_lines,
             tweet=False, swap=False):
    ''' Find files and print them out '''
    src_man_fh = open(os.path.join(outdir, "%s.%s.manifest" % (prefix, src)), 'w')
    trg_man_fh = open(os.path.join(outdir, "%s.%s.manifest" % (prefix, trg)), 'w')

    # open a bunch of file handles
    # third element indicates whether it should actually be opened or if the file should be simply named
    namedirpairs = [('orig', origoutdir, True),
                    ('cleanorig', cleanorigoutdir, False),
                    ('tok', tokoutdir, True),
                    ('cleantok', cleantokoutdir, False),
                    ('morphtok', morphtokoutdir, True),
                    ('cdectok', cdectokoutdir, False),
                    ('cdectoklc', cdectoklcoutdir, False),
                    ('agiletok', agiletokoutdir, False),
                    ('agiletoklc', agiletoklcoutdir, False),
                    ('morph', morphoutdir, True),
                    ('pos', posoutdir, True),
                    ]
    outfiles = dd(dict)
    for sidename, side in (('src', src),
                           ('trg', trg)):
        for dirname, dirval, doopen in namedirpairs:
            entry = os.path.join(outdir, dirval, "{}.{}.{}.flat".format(prefix, dirval, side))
            if doopen:
                entry = open(entry, 'w')
            outfiles[sidename][dirname] = entry

    garbagefhs = {}
    garbagedisabled = True
    if garbageoutdir is not None:
        garbagedisabled = False
        src_orig_garbage_fh = open(os.path.join(outdir, garbageoutdir, "%s.%s.flat" % \
                                                (prefix, src)), 'w')
        garbagefhs[outfiles['src']['orig']] = src_orig_garbage_fh
        trg_orig_garbage_fh = open(os.path.join(outdir, garbageoutdir, "%s.%s.flat" % \
                                                (prefix, trg)), 'w')
        garbagefhs[outfiles['trg']['orig']] = trg_orig_garbage_fh
        src_garbage_man_fh = open(os.path.join(outdir, garbageoutdir, "%s.%s.manifest" % (prefix, src)), 'w')
        garbagefhs[src_man_fh] = src_garbage_man_fh
        trg_garbage_man_fh = open(os.path.join(outdir, garbageoutdir, "%s.%s.manifest" % (prefix, trg)), 'w')
        garbagefhs[trg_man_fh] = trg_garbage_man_fh

    (stpsrc, stptrg) = (trg, src) if swap else (src, trg)
    for m in stp(path, src=stpsrc, trg=stptrg, xml=True, tweet=tweet):
        sdata, tdata = el(*m)

        # found data sometimes seems to require swap behavior
        if swap:
            sdata, tdata = tdata, sdata

        if sdata is None or tdata is None:
            sys.stderr.write("Warning: empty files:\n%s or %s\n" % (m[0], m[1]))
            continue
        # Strict rejection of different length lines. If these are desired,
        # do gale & church or brown et al or something similar here
        slen = len(sdata["ORIG"])
        tlen = len(tdata["ORIG"])
        # print(slen,tlen)
        if slen != tlen:
            sys.stderr.write("Warning: different number of lines in files:\n" \
                             "%s %d\n%s %d\n" % (m[0], slen, m[1], tlen))
            continue

        # filter out control code-bearing lines here. mask out the data from all fields
        garbagemask = lputil.getgarbagemask(sdata["ORIG"], tdata["ORIG"], disabled=garbagedisabled)

        goodmask = [not x for x in garbagemask]
        ### Write original
        for fh, data in zip((outfiles['src']['orig'], outfiles['trg']['orig']), (sdata["ORIG"], tdata["ORIG"])):
            for line in compress(data, garbagemask):
                fh.write(line)
            ### Write garbage original
            if not garbagedisabled:
                for line in compress(data, goodmask):
                    garbagefhs[fh].write(line)

        ### Write manifest

        try:
            for fh, fname, tupgen in zip((src_man_fh, trg_man_fh), (m[0], m[1]),
                                         (list(zip(sdata["DOCID"], sdata["SEGID"],
                                                   sdata["START"], sdata["END"])),
                                          list(zip(tdata["DOCID"], tdata["SEGID"],
                                                   tdata["START"], tdata["END"])))):
                for tup in compress(tupgen, garbagemask):
                    fh.write("\t".join(map(str, (fname,) + tup)) + "\n")
                if not garbagedisabled:
                    for tup in compress(tupgen, goodmask):
                        garbagefhs[fh].write("\t".join(map(str, (fname,) + tup)) + "\n")
        except:
            sys.stderr.write(src_man_fh.name)
            # sys.stderr.write(fname)
            raise

        ### Write tokenized, morph tokenized, pos tag

        zipset = zip(
            ((outfiles["src"]["tok"], outfiles["src"]["morphtok"], outfiles["src"]["morph"], outfiles["src"]["pos"]),
             (outfiles["trg"]["tok"], outfiles["trg"]["morphtok"], outfiles["trg"]["morph"], outfiles["trg"]["pos"])),
            (sdata, tdata))

        for fhset, data in zipset:
            for fh, field in zip(fhset, ("TOK", "MORPHTOK", "MORPH", "POS")):
                for line in compress(data[field], garbagemask):
                    fh.write(line)

    # raw orig->clean orig
    # raw tok->clean tok
    # run agile tokenizer on target orig
    # TODO: lowercase

    outfiles['src']['orig'].close()
    for side in ('src', 'trg'):
        for contents in ('orig', 'tok'):
            outfiles[side][contents].close()
            cleancmd = "{cmd} {infile} {outfile}".format(cmd=cleanpath, infile=outfiles[side][contents].name,
                                                         outfile=outfiles[side]["clean{}".format(contents)])
            sys.stderr.write(cleancmd + "\n")
            try:
                check_call(shlex.split(cleancmd))
            except CalledProcessError as e:
                sys.stderr.write("Error code %d running %s\n" % (e.returncode, e.cmd))
                sys.exit(1)
    agiletok_cmd = "%s -i %s -o %s -t %s " % (
    agiletokpath, outfiles['trg']['cleanorig'], outfiles["trg"]["agiletoklc"], outfiles["trg"]["agiletok"])
    sys.stderr.write(agiletok_cmd + "\n")
    try:
        check_call(shlex.split(agiletok_cmd))
    except CalledProcessError as e:
        sys.stderr.write("Error code %d running %s\n" % (e.returncode, e.cmd))
        sys.exit(1)
    # run cdec tokenizer on source orig

    if docdec:
        cdectok_cmd = "%s -i %s -o %s -t %s " % (
        cdectokpath, outfiles['src']['cleanorig'], outfiles["src"]["cdectoklc"], outfiles["src"]["cdectok"])
        sys.stderr.write(cdectok_cmd + "\n")
        try:
            check_call(shlex.split(cdectok_cmd))
        except CalledProcessError as e:
            sys.stderr.write("Error code %d running %s\n" % (e.returncode, e.cmd))
            sys.exit(1)
Пример #57
0
import itertools as it
# 默认使用累加的方式计算下一个元素的值
for e in it.accumulate(range(6)):
    print(e, end=', ')  # 0, 1, 3, 6, 10, 15
print('\n---------')
# 使用x*y的方式来计算迭代器下一个元素的值
for e in it.accumulate(range(1, 6), lambda x, y: x * y):
    print(e, end=', ')  # 1, 2, 6, 24, 120
print('\n---------')
# 将两个序列“链”在一起,生成新的迭代器
for e in it.chain(['a', 'b'], ['Kotlin', 'Swift']):
    print(e, end=', ')  # 'a', 'b', 'Kotlin', 'Swift'
print('\n---------')
# 根据第二个序列来筛选第一个序列的元素,
# 由于第二个序列只有中间两个元素为1(True),因此前一个序列只保留中间两个元素
for e in it.compress(['a', 'b', 'Kotlin', 'Swift'], [0, 1, 1, 0]):
    print(e, end=', ')  # 只有: 'b', 'Kotlin'
print('\n---------')
# 获取序列中从长度不小于4的元素开始、到结束的所有元素
for e in it.dropwhile(lambda x: len(x) < 4, ['a', 'b', 'Kotlin', 'x', 'y']):
    print(e, end=', ')  # 只有: 'Kotlin', 'x', 'y'
print('\n---------')
# 去掉序列中从长度不小于4的元素开始、到结束的所有元素
for e in it.takewhile(lambda x: len(x) < 4, ['a', 'b', 'Kotlin', 'x', 'y']):
    print(e, end=', ')  # 只有: 'a', 'b'
print('\n---------')
# 只保留序列中从长度不小于4的元素
for e in it.filterfalse(lambda x: len(x) < 4, ['a', 'b', 'Kotlin', 'x', 'y']):
    print(e, end=', ')  # 只有: 'Kotlin'
print('\n---------')
# 使用pow函数对原序列的元素进行计算,将计算结果作为新序列的元素
Пример #58
0
with assert_raises(StopIteration):
    next(it)

it = itertools.accumulate([3, 2, 4, 1, 0, 5, 8], lambda a, v: a * v)
assert 3 == next(it)
assert 6 == next(it)
assert 24 == next(it)
assert 24 == next(it)
assert 0 == next(it)
assert 0 == next(it)
assert 0 == next(it)
with assert_raises(StopIteration):
    next(it)

# itertools.compress
assert list(itertools.compress("ABCDEF", [1, 0, 1, 0, 1, 1])) == list("ACEF")
assert list(itertools.compress("ABCDEF", [0, 0, 0, 0, 0, 0])) == list("")
assert list(itertools.compress("ABCDEF", [1, 1, 1, 1, 1, 1])) == list("ABCDEF")
assert list(itertools.compress("ABCDEF", [1, 0, 1])) == list("AC")
assert list(itertools.compress("ABC", [0, 1, 1, 1, 1, 1])) == list("BC")
assert list(itertools.compress("ABCDEF",
                               [True, False, "t", "", 1, 9])) == list("ACEF")

# itertools.tee
t = itertools.tee([])
assert len(t) == 2
assert t[0] is not t[1]
assert list(t[0]) == list(t[1]) == []

with assert_raises(TypeError):
    itertools.tee()
Пример #59
0
        xs, err_xs = excess_variance(segment['RATE'].values,
                                     segment['ERROR'].values,
                                     normalized=False)
        xs_arr.append(xs)
        xs_err_arr.append(err_xs)

        i = i + M

    mask_negative = []
    for el in xs_arr:
        if el < 0:
            mask_negative.append(False)
        else:
            mask_negative.append(True)

    xs_arr = list(compress(xs_arr, mask_negative))
    xs_err_arr = list(compress(xs_err_arr, mask_negative))
    mean_time_nonneg = list(compress(mean_time, mask_negative))

    axs[2].errorbar(mean_time_nonneg,
                    xs_arr,
                    xs_err_arr,
                    color='black',
                    marker='.',
                    linestyle='',
                    ecolor='gray')
    axs[2].grid()
    axs[2].set_ylabel('$\sigma_{XS}^2$', fontsize=10)

    #Subplot mean excess variance
    df_mean_xs = pd.DataFrame({
Пример #60
0
result = itertools.islice(range(10), 1, 5, 2)
for item in result:
    print(item)

#example for islice

with open("sample.txt", "r") as f:
    header = itertools.islice(f, 3)

    for line in header:
        print(line, end="")

#itertools.compress(similar to filter fn -> filter we will give fn to determin True or False.)
selectors = [True, False, True]
result1 = itertools.compress(letters, selectors)
print(list(result1))

#filter
even = filter(lambda x: x % 2 == 0, range(10))
print(list(even))

#itertools.filterfalse:
odd = itertools.filterfalse(lambda x: x % 2 == 0, range(10))
print(list(odd))


def lt_2(n):
    if n < 2:
        return True
    return False