def __init__(self):
    self.all_nb_paxos_nodes = oset()
    self.all_nb_clients = oset()
    self.all_req_size = oset()
    self.all_nb_iter = oset()
    self.all_leader_acceptor = set()
    self.all_channel_size = oset()
    self.results = {}
 def __init__(self, nb_summaries):
    self.nb_summaries_to_take= nb_summaries
    self.all_nb_paxos_nodes = oset()
    self.all_nb_clients = oset()
    self.all_req_size = oset()
    self.all_nb_iter = oset()
    self.all_leader_acceptor = set()
    self.all_channel_size = oset()
    self.results = {}
Пример #3
0
    def __init__(self, name, inputs=None, outputs=None, listing=[]):
        self.name = name
        self.listing = listing
        self.free_symbols = lang.freesym(self.listing)
        names = set([x.name[:x.name.find('[')] for x in self.free_symbols])
        # self.inputs= [ x for x in popcorn_globals.registered_inputs
        #               if x.name in names ]
        # self.outputs= [ x for x in popcorn_globals.registered_outputs
        #               if x.name in names ]
        self.inputs = [
            popcorn_globals.registered_inputs[n] for n in names
            if n in popcorn_globals.registered_inputs
        ]
        self.outputs = [
            popcorn_globals.registered_outputs[n] for n in names
            if n in popcorn_globals.registered_outputs
        ]
        # Make a list of all of the DOfSpaces and then order them
        S = oset()
        for i in self.inputs:
            S.add(i.dspace)
        for o in self.outputs:
            for d in o.dspaces:
                S.add(d)
        dic = OrderedDict()
        for i, d in enumerate(S):
            dic[d] = i
        self.spaces = dic

        popcorn_globals.registered_kernels.add(self)
Пример #4
0
   def __init__(self, name, filename):
      self.name = name
      self.filename = filename
      self.all_msg_size = oset()
      self.results = {}

      self.populate_results()
Пример #5
0
   def __init__(self, name, filename, thr_or_lat = ""):
      self.name = name
      self.filename = filename
      self.thr_or_lat = thr_or_lat
      self.all_keys = oset()
      self.results = {}

      self.populate_results()
Пример #6
0
    def __init__(self, G, root):
        self._G = G
        tovisit = set(G.nodes())
        tovisit.remove(root)
        # table = UnboundedTable()
        width = 1
        table = [[root]]
        while tovisit:
            # get last row
            last = table[-1]
            # oset for unique and ordered hosts
            # nbrs = oset()
            nbrs = []
            # get all non-visited neighbors
            for n in last:
                nbrs += filter(lambda n: n in tovisit,
                        nx.neighbors(G,n))
            nbrs = oset(nbrs)
            tovisit -= nbrs
            # if there are no more nbrs, we're done
            if not nbrs:
                break

            # increase table size
            width = max(len(nbrs), width)
            itms = [i for i,itm in enumerate(last) if i is not None]
            # TODO: more pythonic rolling average
            # rolling avg(A, n) = A*n + t / n + 1
            # avg = sum((i for i,itm in enumerate(last) 
                # if i is not None))
            avg = sum(itms)//len(itms)

            # simulate unbounded table
            # there's likely a more pythonic way of doing this
            # expand previous rows
            for i, row in enumerate(table):
                if len(row) < width:
                    diff = width - len(row)
                    table[i] = ([None]*(diff//2)
                            + row
                            + [None]*(diff-diff//2)
                        )
            table.append([None]*width)
            table
            newrow = table[-1]
            # add them to the table near their parents
            for offset, nbr in enumerate(nbrs):
                newrow[offset-avg//2] = nbr

        # iter over table rotating (inverting
        for i, row in enumerate(table):
            for j, itm in enumerate(row):
                if itm is not None:
                    self[itm] = (j, i)
Пример #7
0
    def get_url_list_from_csv(self):
        filename = "/home/user/python_projects/Amazon/Amazon_PDP_URLs1.csv"
        skus1 = []

        with open(filename, 'r') as csvfile:
            csvreader = csv.reader(csvfile)
            #fields = csvreader.next()
            for row in csvreader:
                skus1.append(row[0])
        skus1.pop(0)
        skus1 = list(oset.oset(skus1))
        return skus1
Пример #8
0
 def __init__(self):
    self.all_nb_nodes = oset()
    self.all_msg_size = oset()
    self.all_chkpt_size = oset()
    self.all_nb_iter = oset()
    self.all_channel_size = oset()
    self.all_limit_thr = oset()
    self.results = {}
Пример #9
0
 def __init__(self, nb_summaries):
    self.nb_summaries_to_take= nb_summaries
    self.all_nb_nodes = oset()
    self.all_msg_size = oset()
    self.all_chkpt_size = oset()
    self.all_nb_iter = oset()
    self.all_channel_size = oset()
    self.all_limit_thr = oset()
    self.results = {}
Пример #10
0
        def open_templates(*templates):

            for t in templates:
                self.parser.read(t.configuration, update=False)
                cache.add(t)

                try:
                    include = self.parser['params'].pop('include')
                    include = filter(None, oset(
                        include.replace(' ', '').split(',')))
                    requirements = filter(  # noqa
                        lambda t: t not in cache, to_template(include))
                    for tt in open_templates(*requirements):
                        yield tt
                except KeyError:
                    pass

                yield t
Пример #11
0
 def __init__(self, G, root):
     self[root] = (0.5, 1)
     tovisit = set(G.nodes())
     layers = [[root]]
     mid = self[root][0]
     while tovisit:
         top = layers[-1]
         # assemble next layer from top neighbors
         # this also sorts neighbors by connections to the top layer
         nbrs = []
         for n in top:
             nbrs += (i for i in nx.neighbors(G,n) if i in tovisit)
         # remove duplicates
         nbrs = oset(nbrs)
         # if we're out of neighbors, we're done
         if not nbrs:
             break
         # y distance is based on the layer sizes, and uses a logistic function
         ydist = -1.5/(1+math.exp(-4.5*abs(len(top)-len(nbrs))-1))
         xsize = 0
         xinc = 0
         if len(nbrs) > 1:
             xsize = 2*len(nbrs)
             xinc = xsize/(len(nbrs)-1)
         # placement:
         xcur = mid - xsize/2
         # get last y line from first element of top layer
         y = self[top[0]][1] - ydist
         newlayer = []
         for i, n in enumerate(nbrs):
             # alternating bumping to prevent horizontal collision
             if i%2==0:
                 y += 0.1*ydist
             else:
                 y -= 0.1*ydist
             tovisit.remove(n)
             newlayer.append(n)
             self[n] = (xcur, y)
             xcur += xinc
         layers.append(newlayer)
     transformspace(self)
Пример #12
0
    def _ordered_merge(self, fresh_set, cached_list=None, new_index=0):
        """Returns list with new items prepended, and index of first old item.
        """
        if cached_list == None:
            return list(fresh_set), set(), 0

        cached_set = set(cached_list)
        if fresh_set == cached_set:
            return cached_list, set(), new_index

        new_set = fresh_set - cached_set
        old_set = cached_set - fresh_set

        if new_index:
            # adjust new_index appropriately if any ids are removed from old
            # new set
            new_index = len(set(cached_list[:new_index]) - old_set)

        results = oset(cached_list) - old_set

        # ids, old_ids, new_index
        return list(new_set) + list(results), old_set, len(new_set) + new_index
Пример #13
0
 def __init__(self):
     xml.sax.ContentHandler.__init__(self)
     self.results = list()
     self.elementStack = list()
     self.columnNames = oset()
Пример #14
0
def computeWeightedAverageKsatClaySandSilt(soilAttrTuple):
    """ Computes weighted average for Ksat, %clay/silt/sand for a SSURGO mukey based on values
        for each component in the mukey; weights based on component.comppct_r.
    
        @param soilAttrTuple Tuple returned from getParentMatKsatTexturePercentClaySiltSandForComponentsInMUKEYs
    
        @return Tuple containing: (1) a list containing column names; (2) a list of lists containing averaged soil properties for each mukey
    """
    data = list()
    representativeComponentDict = dict()
    derivedSet = oset()
    idx = 0
    
    # Convert numbers as text to numbers
    for row in soilAttrTuple[1]:
        mukey = int(row[0])
        comppct_r = int(row[2])
        try:
            maxRepComp = representativeComponentDict[mukey][1]
            if maxRepComp < comppct_r:
                representativeComponentDict[mukey] = (idx, comppct_r)
        except KeyError:
            representativeComponentDict[mukey] = (idx, comppct_r)
        try:
            hzdept_r = float(row[7])
        except ValueError:
            hzdept_r = -1
        try:
            ksat_r = float(row[8])
        except ValueError:
            ksat_r = -1
        try:
            claytotal_r = float(row[9])
        except ValueError:
            claytotal_r = -1
        try:
            silttotal_r = float(row[10])
        except ValueError:
            silttotal_r = -1
        try:
            sandtotal_r = float(row[11])
        except ValueError:
            sandtotal_r = -1
        try:
            wsatiated_r = float(row[12]) # a.k.a. porosity
        except ValueError:
            wsatiated_r = -1
        try:
            wthirdbar_r = float(row[13]) # a.k.a. field capacity
        except ValueError:
            wthirdbar_r = -1
        try:
            awc_r = float(row[14]) # a.k.a. plant available water capacity
        except ValueError:
            awc_r = -1
    
        data.append([mukey, row[1], comppct_r, row[3], row[4], row[5], row[6], hzdept_r, 
                     ksat_r, claytotal_r, silttotal_r, sandtotal_r, wsatiated_r,
                     wthirdbar_r, awc_r])
        idx = idx + 1

    mukeyCol = [row[0] for row in data]
    comppctCol = [row[2] for row in data]
    ksatCol = [row[8] for row in data]
    clayCol = [row[9] for row in data]
    siltCol = [row[10] for row in data]
    sandCol = [row[11] for row in data]
    porosityCol = [row[12] for row in data]
    fieldCapCol = [row[13] for row in data]
    availWaterCapCol = [row[14] for row in data]

    # Put values into Numpy 2-D array    
    npdata = np.array([mukeyCol, comppctCol, ksatCol, clayCol, siltCol, sandCol, 
                       porosityCol, fieldCapCol, availWaterCapCol]).transpose()
    # Remove duplicate rows 
    #   (which will arise because there can be multiple parent material groups for a given component)
    npdata = np.array([np.array(x) for x in set(tuple(x) for x in npdata)])
    # Register NoData values
    npdata = np.ma.masked_where(npdata == -1, npdata)

    # Calculate weighted average using component.comppct_r as weights
    avgSoilAttr = list()
    mukeySet = set(mukeyCol)
    for mukey in mukeySet:
        mySubSet = npdata[npdata[:,0] == mukey]
        myComppct = mySubSet[:,1]
        myKsat = mySubSet[:,2]
        myClay = mySubSet[:,3]
        mySilt = mySubSet[:,4]
        mySand = mySubSet[:,5]
        myPorosity = mySubSet[:,6]
        myFieldCap = mySubSet[:,7]
        myAvailWaterCap = mySubSet[:,8]
        # Calculate weighted averages, ignoring NoData values
        # These variable names MUST match values in ATTRIBUTE_LIST_NUMERIC
        ksat = np.ma.average(myKsat, weights=myComppct)
        pctClay = np.ma.average(myClay, weights=myComppct)
        pctSilt = np.ma.average(mySilt, weights=myComppct)
        pctSand = np.ma.average(mySand, weights=myComppct)
        porosity = np.ma.average(myPorosity, weights=myComppct)
        fieldCap = np.ma.average(myFieldCap, weights=myComppct)
        avlWatCap = np.ma.average(myAvailWaterCap, weights=myComppct)
        
        # Get modal value for qualitative values (pmgroupname, texture, tecdesc)
        maxRepIdx = representativeComponentDict[mukey][0]
        pmgroupname = data[maxRepIdx][3]
        texture = data[maxRepIdx][4]
        texdesc = data[maxRepIdx][5]
        
        attrList = [mukey, ksat, pctClay, pctSilt, pctSand, porosity, pmgroupname, texture, texdesc,
                    fieldCap, avlWatCap]
        # Generate derived variables
        for attr in list(DERIVED_ATTRIBUTES.keys()):
            derivedAttr = eval( DERIVED_ATTRIBUTES[attr] )
            derivedSet.add(attr)
            attrList.append(derivedAttr) 
        
        avgSoilAttr.append(attrList)
    avgSoilHeaders = list(ATTRIBUTE_LIST)
    avgSoilHeaders.insert(0, 'mukey')
    for derived in derivedSet:
        print(("Computed derived attribute %s = %s" % \
              (derived, DERIVED_ATTRIBUTES[derived]) ))
        avgSoilHeaders.append(derived)
    
    return (avgSoilHeaders, avgSoilAttr)
Пример #15
0
 def __init__(self, thr_or_lat):
    self.name = ""
    self.filename = ""
    self.thr_or_lat = ""
    self.all_keys = oset()
    self.results = {}
Пример #16
0
def computeWeightedAverageKsatClaySandSilt(soilAttrTuple):
    """ Computes weighted average for Ksat, %clay/silt/sand for a SSURGO mukey based on values
        for each component in the mukey; weights based on component.comppct_r.
    
        @param soilAttrTuple Tuple returned from getParentMatKsatTexturePercentClaySiltSandForComponentsInMUKEYs
    
        @return Tuple containing: (1) a list containing column names; (2) a list of lists containing averaged soil properties for each mukey
    """
    data = list()
    representativeComponentDict = dict()
    derivedSet = oset()
    idx = 0

    # Convert numbers as text to numbers
    for row in soilAttrTuple[1]:
        mukey = int(row[0])
        comppct_r = int(row[2])
        try:
            maxRepComp = representativeComponentDict[mukey][1]
            if maxRepComp < comppct_r:
                representativeComponentDict[mukey] = (idx, comppct_r)
        except KeyError:
            representativeComponentDict[mukey] = (idx, comppct_r)
        try:
            hzdept_r = float(row[7])
        except ValueError:
            hzdept_r = -1
        try:
            ksat_r = float(row[8])
        except ValueError:
            ksat_r = -1
        try:
            claytotal_r = float(row[9])
        except ValueError:
            claytotal_r = -1
        try:
            silttotal_r = float(row[10])
        except ValueError:
            silttotal_r = -1
        try:
            sandtotal_r = float(row[11])
        except ValueError:
            sandtotal_r = -1
        try:
            wsatiated_r = float(row[12])  # a.k.a. porosity
        except ValueError:
            wsatiated_r = -1
        try:
            wthirdbar_r = float(row[13])  # a.k.a. field capacity
        except ValueError:
            wthirdbar_r = -1
        try:
            awc_r = float(row[14])  # a.k.a. plant available water capacity
        except ValueError:
            awc_r = -1

        data.append(
            [
                mukey,
                row[1],
                comppct_r,
                row[3],
                row[4],
                row[5],
                row[6],
                hzdept_r,
                ksat_r,
                claytotal_r,
                silttotal_r,
                sandtotal_r,
                wsatiated_r,
                wthirdbar_r,
                awc_r,
            ]
        )
        idx = idx + 1

    mukeyCol = [row[0] for row in data]
    comppctCol = [row[2] for row in data]
    ksatCol = [row[8] for row in data]
    clayCol = [row[9] for row in data]
    siltCol = [row[10] for row in data]
    sandCol = [row[11] for row in data]
    porosityCol = [row[12] for row in data]
    fieldCapCol = [row[13] for row in data]
    availWaterCapCol = [row[14] for row in data]

    # Put values into Numpy 2-D array
    npdata = np.array(
        [mukeyCol, comppctCol, ksatCol, clayCol, siltCol, sandCol, porosityCol, fieldCapCol, availWaterCapCol]
    ).transpose()
    # Remove duplicate rows
    #   (which will arise because there can be multiple parent material groups for a given component)
    npdata = np.array([np.array(x) for x in set(tuple(x) for x in npdata)])
    # Register NoData values
    npdata = np.ma.masked_where(npdata == -1, npdata)

    # Calculate weighted average using component.comppct_r as weights
    avgSoilAttr = list()
    mukeySet = set(mukeyCol)
    for mukey in mukeySet:
        mySubSet = npdata[npdata[:, 0] == mukey]
        myComppct = mySubSet[:, 1]
        myKsat = mySubSet[:, 2]
        myClay = mySubSet[:, 3]
        mySilt = mySubSet[:, 4]
        mySand = mySubSet[:, 5]
        myPorosity = mySubSet[:, 6]
        myFieldCap = mySubSet[:, 7]
        myAvailWaterCap = mySubSet[:, 8]
        # Calculate weighted averages, ignoring NoData values
        # These variable names MUST match values in ATTRIBUTE_LIST_NUMERIC
        ksat = np.ma.average(myKsat, weights=myComppct)
        pctClay = np.ma.average(myClay, weights=myComppct)
        pctSilt = np.ma.average(mySilt, weights=myComppct)
        pctSand = np.ma.average(mySand, weights=myComppct)
        porosity = np.ma.average(myPorosity, weights=myComppct)
        fieldCap = np.ma.average(myFieldCap, weights=myComppct)
        avlWatCap = np.ma.average(myAvailWaterCap, weights=myComppct)

        # Get modal value for qualitative values (pmgroupname, texture, tecdesc)
        maxRepIdx = representativeComponentDict[mukey][0]
        pmgroupname = data[maxRepIdx][3]
        texture = data[maxRepIdx][4]
        texdesc = data[maxRepIdx][5]

        attrList = [
            mukey,
            ksat,
            pctClay,
            pctSilt,
            pctSand,
            porosity,
            pmgroupname,
            texture,
            texdesc,
            fieldCap,
            avlWatCap,
        ]
        # Generate derived variables
        for attr in DERIVED_ATTRIBUTES.keys():
            derivedAttr = eval(DERIVED_ATTRIBUTES[attr])
            derivedSet.add(attr)
            attrList.append(derivedAttr)

        avgSoilAttr.append(attrList)
    avgSoilHeaders = list(ATTRIBUTE_LIST)
    avgSoilHeaders.insert(0, "mukey")
    for derived in derivedSet:
        print("Computed derived attribute %s = %s" % (derived, DERIVED_ATTRIBUTES[derived]))
        avgSoilHeaders.append(derived)

    return (avgSoilHeaders, avgSoilAttr)
Пример #17
0
        print("Using single climate station: %s" % (args.climateStation,) )
    climParamFilename = "%s.base" % (args.climateStation,)
    baseFile = os.path.join(paths._CLIM, climParamFilename)
    subs['climate_stations'] = baseFile
    subs['num_climate_stations'] = 1

    climParams = readParameterFile( os.path.join(paths.RHESSYS_CLIM, climParamFilename) )
    climateStationIDStr = "base_station_ID\tdvalue %s" % (climParams['base_station_id'],)
    subs['zone_base_station_ids'] = climateStationIDStr
    subs['zone_num_base_stations'] = 1
else:
    # Use base station raster map
    if args.verbose:
        print("Reading climate stations from raster: %s" % (grassMetadata['basestations_rast'],) )
    # Get list of base station IDs from raster
    rasterIds = oset()
    pipe = grassLib.script.pipe_command('r.stats', flags='licn', input=grassMetadata['basestations_rast'])
    for line in pipe.stdout:
        values = line.strip().split()
        if values[1] != 'NULL':
            rasterIds.add( int(values[0]) )
    # Get base station IDs from base station files
    baseIds = oset()
    baseFiles = {}
    for station in metadata['climate_stations'].split(','):
        climParamFilename = "%s.base" % (station,)
        baseFile = os.path.join( paths._CLIM, climParamFilename )
        climParams = readParameterFile( os.path.join(paths.RHESSYS_CLIM, climParamFilename) )
        id = int(climParams['base_station_id'])
        baseIds.add(id)
        baseFiles[id] = baseFile
Пример #18
0
def difference(df_a: pd.DataFrame,
               df_b: pd.DataFrame,
               arrow: str = '→',
               missing_column="<missing column>",
               empty="<empty>",
               same='',
               show_empty_cols=False,
               show_empty_rows=False,
               renamed={}) -> pd.DataFrame:
    """
    Diffs two data frames by joining them on their indices and returning a DataFrame with cells that show how the two differ

    :param df_a: The initial/first DataFrame, the one that we consider the second as diverging from
    :param df_b: The second DataFrame, one which we consider as diverging from df_a
    :param arrow: The character used to indicate a change in value, e.g. "initial value" + arrow + "final value"
    :param missing_column: The string value used to indicate a column that has been inserted or deleted
    :param empty: The string value used to indicate an empty cell
    :param show_empty_cols: True if every column of the input DataFrames should be printed, even if they are identical
        between DataFrames. Otherwise, ignore such columns
    :param same: What value to show when two cells have the same value. Defaults to '', an empty string. Can also be
        any arbitrary string, or, if it's the string 'value', instead just show what that value is.
    :param show_empty_rows: True if every row of the input DataFrames should be printed, even if they are identical
        between DataFrames. Otherwise, ignore such rows
    :param renamed: A dictionary showing how columns have been renamed from df_a to df_b. These renamed columns will then
        mapped with each other for the diff. The keys of this dictionary should be the column names in df_a, and the values
        should be the column names in df_b
    :return: A DataFrame, with the same columns as the input DataFrames, but with each cell showing how the two input
        DataFrames differed.
    """

    # We'll use a and b as the DFs
    a = df_a
    b = df_b

    # Rename the columns if needed
    if renamed:
        a.rename(columns=renamed, inplace=True)

    # Find a set of all columns
    a_cols = oset(df_a.columns)
    b_cols = oset(df_b.columns)
    columns = a_cols | b_cols

    # Ensure all DFs have the same columns for better diffing
    for column in columns:
        for df in (a, b):
            if not column in df:
                df[column] = missing_column

    # Join the two data frames and produce a multi-indexed data frame
    merged = pd.concat([a, b], keys=['a', 'b'], join='inner', axis=1)

    # Group the merged data
    groups = merged.groupby(level=1, axis=1)

    # A DF showing the diff values (A->B) in each cell
    diff = groups.apply(
        lambda group: group.iloc[:, 0].fillna(empty).astype(str).str.cat(
            others=group.iloc[:, 1].fillna(empty).astype(str), sep=arrow))

    # A df with cells that are True where the two cells are different and False otherwise
    mask = groups.apply(lambda group: group.iloc[:, 0] != group.iloc[:, 1])

    # The output is A->B if they're different, otherwise NAN
    result = diff.where(cond=mask, other=pd.np.nan)

    # Now filter out any row that is all NAN
    if not show_empty_rows:
        result = result[result.notnull().any(axis=1)]

    # Then filter out any column that is all NAN
    if not show_empty_cols:
        result = result.loc[:, result.notnull().any(axis=0)]

    if same == 'value':
        # If the user wants to see the values of the cells that are the same, take them from DF a
        result = result.fillna(a)
    else:
        # Otherwise, just fill those cells with the provided values
        result = result.fillna(same)

    return result
            fastqobjects.append(
                FastqObject(temp1[0], temp1[1], temp1[2], temp1[3], temp1[4],
                            temp1[5]))
    print "\tNumber of reads in file: " + str(count)
    print "\tNumber of fastq objects created: " + str(len(fastqobjects))
    return fastqobjects


a = Fastq_Parser(fastq1)
b = Fastq_Parser(fastq2)
print "R1 reads total: " + str(len(a))
print "R2 reads total: " + str(len(b))
# Convert list outputs from Fastq_Parser for each of the two input fastq files
# into ordered sets, which are much faster to test for membership in than lists (used
# later)
c = oset(a)
d = oset(b)
print "Length set a :" + str(len(c))
print "Length set b :" + str(len(d))


# Creates a list of all kmer headers (minus the #:#:#:# bit at the end of each header)
# from both input kmer files for both input (R1 and R2) fastq files. This list
# may contain duplicates at this point, but they are removed when the output
# list is converted into a set (see below)
def reads_to_remove(kmerfile1, kmerfile2):
    with open(kmerfile1, 'r') as infile:
        headers1 = []
        for line in infile:
            if line.startswith("@"):
                temp1 = line.split(' ')
Пример #20
0
 def append_from_wordlist(self, wordlist):
     self.entries = list(oset(self.entries + wordlist.process()))
Пример #21
0
 def __init__(self):
    self.name = ""
    self.filename = ""
    self.all_msg_size = oset()
    self.results = {}
Пример #22
0
                      str(len(indel_positions)) + '\n')

        print "Creating list of high quality variant positions..."
        hq_positions_list(
            variant_objects, indel_positions
        )  # doesn't need to return anything because this function appends to the global list "hq_positions_list1"...
        print "Total high quality variant positions identified: " + str(
            len(hq_positions_set1))
        infile0.write("Total high quality variant positions identified: " +
                      '\t' + str(len(hq_positions_set1)) + '\n' + '\n')

# Sort hq_positions_list in ascending numerical order:
hq_positions_list1 = sorted(hq_positions_set1)

# Convert hq_positions_list to a set for faster membership testing in list_of_SNPs() function call:
hq_positions_set2 = oset(hq_positions_list1)
# Write positions to output file:
with open(positions_output_file, 'w') as outfile0:
    for i in hq_positions_list1:
        outfile0.write(str(i) + '\n')

# Parse each vcf file and identify possible SNPs using relaxed filtering criteria compared to hq_positions_list() function:
with open(vcf_file_list, 'r') as infile2:
    for line in infile2:
        print "Identifying possible SNPs in file: " + '\t' + line.strip()
        infile0.write("Identifying possible SNPs in file: " + '\t' +
                      line.strip() + '\n')

        variant_objects = parse_vcf(line.strip())
        variant_objects_set = set(variant_objects)
Пример #23
0
 def __init__(self):
     xml.sax.ContentHandler.__init__(self)
     self.results = list()
     self.elementStack = list()
     self.columnNames = oset()
Пример #24
0
def percent_expansions(text):
    references = oset(re.findall('%[(][a-zA-Z_][a-zA-Z0-9_]*[)]s', text))
    return [s.split('(')[1].split(')')[0] for s in references]
Пример #25
0
def percent_expansions(text):
    references = oset(param_reference.findall(text))
    return [s.split('(')[1].split(')')[0] for s in references]