Exemplo n.º 1
0
def generate(X, seqType, args):
    '''
    # Note-1: args.gGap --> 1, 2, 3
    # Note-2: gGap --> ('X', 'X')
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    elements = utils.sequenceElements(seqType)
    m2 = list(itertools.product(elements, repeat=2))
    m = m2

    # print(args.gGap)

    T = []
    for x in X:
        x = x[:args.terminusLength]
        t = []
        for i in range(1, args.gGap + 1, 1):
            V = utils.kmers(x, i + 2)
            # seqLength = len(x) - (i+2) + 1
            for gGap in m:
                # print(gGap[0], end='')
                # print('-'*i, end='')
                # print(gGap[1])
                # trackingFeatures.append(gGap[0] + '-' * i + gGap[1])
                C = 0
                for v in V:
                    if v[0] == gGap[0] and v[-1] == gGap[1]:
                        C += 1
                # print(C, end=',')
                t.append(C)
            #end-for
        #end-for
        t = np.array(t)
        # t = t.reshape(-1, 1)
        T.append(t)
    # end-for
    T = np.array(T)
    # print(T.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        totalFeature = (4 * args.gGap * 4)
    else:
        if seqType == 'PROT':
            totalFeature = (20 * args.gGap * 20)
        else:
            None
    #end-if

    save.datasetSave(T, totalFeature, 'fg11')


#end-def
Exemplo n.º 2
0
def generate(X, seqType, args):
    '''
    # Reference-1: (http://rosalind.info/glossary/k-mer-composition/) # It is also called "k-mer composition".
    # Reference-2: iRecSpot-EF: https://www.sciencedirect.com/science/article/abs/pii/S0010482518302981
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    elements = utils.sequenceElements(seqType)
    # print(elements)
    # print(args.gGap)
    # print(args.kTuple)

    T = []
    for x in X:
        x = x[:args.terminusLength]
        t = []
        for i in range(1, args.kTuple + 1, 1):
            v = list(itertools.product(elements, repeat=i))
            # seqLength = len(x) - i + 1
            for i in v:
                # print(x.count(''.join(i)), end=',')
                t.append(x.count(''.join(i)))
        ### --- ###
        t = np.array(t)
        # t = t.reshape(-1, 1)
        # print(t.shape)
        T.append(t)
    #end-for

    T = np.array(T)
    # print(T.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        totalFeature = np.sum([4**(i) for i in range(1, args.kTuple + 1)])
    else:
        if seqType == 'PROT':
            totalFeature = np.sum([20**(i) for i in range(1, args.kTuple + 1)])
        else:
            None
    #end-if

    save.datasetSave(T, totalFeature, 'fkmer')
Exemplo n.º 3
0
def generate(X, seqType, args):
    '''
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'DNA' or seqType == 'RNA':
        p = [0] * (4 * 4)  # As we are working for g11
    else:
        if seqType == 'PROT':
            p = [0] * (20 * 20)  # As we are working for g11
        else:
            None

    # Trail: Merged
    elements = utils.sequenceElements(seqType)
    m = list(itertools.product(elements, repeat=2))

    T = []
    for x in X:
        merged = []
        x = x[:args.terminusLength]
        for i in range(1, args.gGap + 1):
            kmers = utils.kmers(x, 2 + i)  # g11 --> 2, gGap (g11+gGap)
            t = []
            require = (args.terminusLength - (2 + 1) + 1) - (len(x) -
                                                             (2 + i) + 1)
            for kmer in kmers:
                d = {''.join(_): 0 for _ in m}
                segment = kmer[0] + kmer[-1]
                d[segment] = 1
                t.append(list(d.values()))
                # break
            # break
            # print(v)
            if require > 0:
                for i in range(require):
                    t.append(p)
                # end-for
            else:
                None
            t = np.array(t)
            # print(t)
            merged.append(t)
            # print('------------------')
        # end-for
        T.append(np.concatenate((merged), axis=1))
    # end-for
    T = np.array(T)
    # print(T.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        totalFeature = (4 * args.gGap * 4)
    else:
        if seqType == 'PROT':
            totalFeature = (20 * args.gGap * 20)
        else:
            None
    # end-if
    save.datasetSave(T, totalFeature, 'pg11')


#end-for
Exemplo n.º 4
0
def generate(X, seqType, args):
    '''
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'DNA' or seqType == 'RNA':
        p = [0] * (4**args.kTuple)
    else:
        if seqType == 'PROT':
            p = [0] * (20**args.kTuple)
        else:
            None
    # print(p)
    # print(len(p))

    elements = utils.sequenceElements(seqType)
    m = list(itertools.product(elements, repeat=args.kTuple))

    terminusLength = args.terminusLength
    # print(terminusLength)

    T = []
    for x in X:
        # print(len(x))
        x = x[:terminusLength]
        # print(len(x))
        # print('-----------------')
        require = (terminusLength - args.kTuple + 1) - (len(x) - args.kTuple +
                                                        1)
        # print(require)
        t = []
        kmers = utils.kmers(x, args.kTuple)
        for kmer in kmers:
            d = {''.join(i): 0 for i in m}
            d[kmer] = 1
            t.append(list(d.values()))
        #end-for
        if require > 0:
            for i in range(require):
                t.append(p)
            #end-for
        else:
            None
        t = np.array(t)
        # print(t.shape)
        T.append(t)
        # print(t.shape)
    #end-for
    T = np.array(T)
    # print(T.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        totalFeature = (4**args.kTuple)
    else:
        if seqType == 'PROT':
            totalFeature = (20**args.kTuple)
        else:
            None
    # end-if

    save.datasetSave(T, totalFeature, 'pkmer')


#end-def