Exemplo n.º 1
0
def match():
    _feature1 = request.form['feature1']
    _feature2 = request.form['feature2']
    _Species = request.form['Species']

    _inputEmail = request.form['inputEmail']

    settings = get_settings()
    encode_pkl = settings['Encode']
    roadmap_pkl = settings['Roadmap']
    GGRmap_pkl = settings['GGR']
    GSMGSE_pkl = settings['GSMGSE_pkl_path']

    keywords1 = _feature1.split(",")
    keywords2 = _feature2.split(",")
    output_prefix1 = keywords1[0]
    output_prefix2 = keywords2[0]
    output_path = '../tmp/'
    type_seq1 = 'chip-seq'
    type_seq2 = 'chip-seq'

    species = _Species if _Species != '' else 'H**o sapiens'
    cwd = settings['Chipseq']

    CallMatch.delay(output_prefix1, output_prefix2, output_path,
                          keywords1, [], keywords2, [],
                          type_seq1, type_seq2, True, True, False, None, False, None,
                          species, True, True,
                          encode_pkl, roadmap_pkl, GGRmap_pkl,
                          GSMGSE_pkl, cwd, 20, _inputEmail)
    return 'We are processing your request, results will be sent to your email'
Exemplo n.º 2
0
def connectToGEO(user='******', ftpAddress='ftp.ncbi.nlm.nih.gov'):
    ### create GEO ftp connection to NCBI
    parameters = get_settings()
    email = parameters['email']

    ftp = FTP(ftpAddress)
    ftp.login(user, email)
    return ftp
Exemplo n.º 3
0
def query():
    f = request.files['IDlist']
    _inputEmail = request.form['inputEmail']
    id_list = []
    for line in f.readlines():
        id_list.append(line.strip())
    f.close()
    output_path = '../tmp/query.txt'
    settings = get_settings()
    GSMGSE_pkl = settings['GSMGSE_pkl_path']
    GSM_SRR_pkl = settings['GSMtoSRRpkl']

    CallQuery.delay(id_list, output_path, GSMGSE_pkl, GSM_SRR_pkl, _inputEmail)
    return 'We are processing your request, results will be sent to your email'
Exemplo n.º 4
0
def search():
    _searchterms = request.form['searchterms']
    _Species = request.form['Species']
    _inputEmail = request.form['inputEmail']
    species = _Species if _Species != '' else 'H**o sapiens'

    settings = get_settings()

    keywords = _searchterms.split(",")
    output_prefix = keywords[0]
    output_path = './tmp/'

    cwd = settings['Chipseq']

    CallSearch.delay(output_prefix, output_path, keywords, species, cwd,
                     _inputEmail)

    return 'We are processing your request, results will be sent to your email'
Exemplo n.º 5
0
def search():
    _searchterms = request.form['searchterms']
    _Species = request.form['Species']
    _inputEmail = request.form['inputEmail']
    species = _Species if _Species != '' else 'H**o sapiens'
    _CellLines = [k.strip() for k in request.form['CellLines'].split(",")]
    _CellTypes = [k.strip() for k in request.form['CellTypes'].split(",")]
    _Organs = [k.strip() for k in request.form['Organs'].split(",")]
    _Tissues = [k.strip() for k in request.form['Tissues'].split(",")]

    settings = get_settings()

    keywords = [k.strip() for k in _searchterms.split(",")]
    output_prefix = keywords[0]
    if len(_CellLines) != [] and _CellLines[0] != '':
        output_prefix += '_' + _CellLines[0]
    if len(_CellTypes) != [] and _CellTypes[0] != '':
        output_prefix += '_' + _CellTypes[0]
    if len(_Organs) != [] and _Organs[0] != '':
        output_prefix += '_' + _Organs[0]
    if len(_Tissues) != [] and _Tissues[0] != '':
        output_prefix += '_' + _Tissues[0]

    output_path = './tmp/'

    cwd = settings['Chipseq']

    CallSearch.delay(output_prefix,
                     output_path,
                     keywords,
                     species,
                     cwd,
                     _CellLines,
                     _CellTypes,
                     _Organs,
                     _Tissues,
                     _inputEmail=_inputEmail)

    return 'We are processing your request, results will be sent to your email'
Exemplo n.º 6
0
def match():
    _feature1 = request.form['feature1']
    _feature2 = request.form['feature2']
    _Species = request.form['Species']

    _inputEmail = request.form['inputEmail']

    settings = get_settings()

    keywords1 = _feature1.split(",")
    keywords2 = _feature2.split(",")
    output_prefix1 = keywords1[0]
    output_prefix2 = keywords2[0]
    output_path = './tmp/'

    species = _Species if _Species != '' else 'H**o sapiens'
    cwd = settings['Chipseq']

    CallMatch.delay(output_prefix1, output_prefix2, output_path, keywords1,
                    keywords2, species, cwd, _inputEmail)

    return 'We are processing your request, results will be sent to your email'
Exemplo n.º 7
0
def search():
    _searchterms = request.form['searchterms']
    _Species = request.form['Species']
    _inputEmail = request.form['inputEmail']

    settings = get_settings()
    encode_pkl = settings['Encode']
    roadmap_pkl = settings['Roadmap']
    GGRmap_pkl = settings['GGR']
    GSMGSE_pkl = settings['GSMGSE_pkl_path']

    keywords = _searchterms.split(",")

    output_prefix = keywords[0]

    output_path = '../tmp/'

    keywords_begin = []

    type_seq = 'chip-seq'
    ignorcase = True
    geo = False
    geo_file = None

    species = _Species
    encode_remove = True
    roadmap_remove = True

    cwd = settings['Chipseq']
    process = 20

    CallSearch.delay(output_prefix, output_path,
               keywords, keywords_begin, type_seq,
               ignorcase, geo, geo_file, species, encode_remove, roadmap_remove,
               encode_pkl, roadmap_pkl, GGRmap_pkl,
               GSMGSE_pkl, cwd, process, _inputEmail)

    return 'We are processing your request, results will be sent to your email'
Exemplo n.º 8
0
def update():
    parameters = get_settings()
    GSMGSE_map = load_obj(parameters['GSMGSE_pkl_path'])
    GSMSRR_map = load_obj(parameters['GSMtoSRRpkl'])
    Encode_map = load_obj(parameters['Encode'])
    Roadmap_map = load_obj(parameters['Roadmap'])
    GGR_map = load_obj(parameters['GGR'])

    MetaData_path = parameters["MetaData"]

    if MetaData_path == "None":
        MetaData_path = None

    GSMGSE_map, Encode_map, Roadmap_map, GGR_map, GSM_need_update = \
        updateGSMGSE_Encode_Roadmap(GSMGSE_map, Encode_map, Roadmap_map, GGR_map, MetaData_path)

    GSMSRR_map = updateGSMSRR(GSMSRR_map, GSM_need_update)

    save_obj(GSMGSE_map, parameters['GSMGSE_pkl_path'][:-4])
    save_obj(Encode_map, parameters['Encode'][:-4])
    save_obj(Roadmap_map, parameters['Roadmap'][:-4])
    save_obj(GGR_map, parameters['GGR'][:-4])
    save_obj(GSMSRR_map, parameters['GSMtoSRRpkl'][:-4])
Exemplo n.º 9
0
def CIG_grid():
    '''
    this function provie an entrance to search function

    '''
    if (len(sys.argv) < 3) and ('-h' not in sys.argv) and ('--help'
                                                           not in sys.argv):
        # at least one parameter need to be specified, will print help message if no parameter is specified
        print "\nusage:\n\npython danpos.py grid [optional arguments] <target_table1> <target_table2> <danpos_result_path> <features> <output_prefix> <output_path>\n\nfor more help, please try: python danpos.py grid -h\n"
        return 1

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        usage=
        "\n\npython danpos.py grid [optional arguments] <target_table1> <target_table2> <danpos_result_path> <features> <output_prefix> <output_path>\n\n",
        description='',
        epilog="Chen lab, Houston Methodist")
    parser.add_argument('command',
                        default=None,
                        help="set as 'grid' to perform parameter optimization")

    parser.add_argument(
        'target_table1',
        default=None,
        help=
        "The first table of genes, containing the columns at least 'gene_id', 'sample_prefix', table delimiter is recognized by the file surfix (csv, tsv, txt, xls, xlsx)",
    )
    parser.add_argument(
        'target_table2',
        default=None,
        help=
        "The second table of genes, containing the columns at least 'gene_id', 'sample_prefix', table delimiter is recognized by the file surfix (csv, tsv, txt, xls, xlsx)"
    )
    parser.add_argument(
        'danpos_result_path',
        default=None,
        help=
        "folder containing the danpos peak calling result tables, make sure the tables startswith sample_prefix and with '_' as delimiter in the name"
    )
    parser.add_argument('features', default=None, help="")
    parser.add_argument('output_prefix',
                        default=None,
                        help="the prefix for output files")
    parser.add_argument('output_path', default=None, help="the output path")
    parser.add_argument(
        'up_stream_grid',
        default=None,
        help=
        "the optimization grid for upstream distance, in the format:'10000:2:1000', meaning, the start grid is 10000, every iteration the grid shrink for 2 times, and the final grid need to be larger than 1000"
    )
    parser.add_argument(
        'down_stream_grid',
        default=None,
        help=
        "the optimization grid for downstream distance, in the format:'10000:2:1000', meaning, the start grid is 10000, every iteration the grid shrink for 2 times, and the final grid need to be larger than 1000"
    )
    parser.add_argument(
        'height_grid',
        default=None,
        help=
        "the optimization grid for height, in the format:'10000:2:1000', meaning, the start grid is 10000, every iteration the grid shrink for 2 times, and the final grid need to be larger than 1000"
    )

    ## optional parameters
    parser.add_argument('-f',
                        dest='function',
                        metavar='',
                        default='wilcoxon',
                        help="wilcoxon or fisher")

    args = None

    if '-h' in sys.argv or '--help' in sys.argv:  # print help information once required by user
        print "\Chipseqpair\n"
        parser.print_help()
        print "\n"
        return 0
    elif len(sys.argv) >= 3:
        try:
            args = parser.parse_args()
        except:
            print "\nfor more help, please try: python CSP.py search -h\n"
            return 1

    if args is not None:
        settings = get_settings()
        encode_pkl = settings['Encode']
        roadmap_pkl = settings['Roadmap']
        GGRmap_pkl = settings['GGR']
        GSMGSE_pkl = settings['GSMGSE_pkl_path']

        keywords = args.feature_key_words.split(",")

        output_prefix = args.output_prefix
        if output_prefix is None:
            output_prefix = keywords[0]

        output_path = args.output_path
        if output_path is None:
            output_path = './search_output/'
            if not os.path.isdir(output_path):
                os.system("mkdir search_output")

        if args.keywords_begin == '':
            keywords_begin = []
        else:
            keywords_begin = args.keywords_begin.split(",")

        type_seq = args.type_seq
        ignorcase = args.ignorecase
        geo = args.geo
        geo_file = args.geo_file

        species = args.species
        encode_remove = 1
        roadmap_remove = 1

        cwd = args.MetaData
        process = args.process

        if cwd is None:
            cwd = settings['MetaData']

        if cwd is None or cwd == "None":
            cwd = None
            encode_remove = True
            roadmap_remove = True

        SOFTQuickParser(output_prefix,
                        output_path,
                        keywords,
                        keywords_begin,
                        type_seq=type_seq,
                        ignorecase=ignorcase,
                        geo=geo,
                        geofile=geo_file,
                        output_type=species,
                        encode_remove=encode_remove,
                        roadmap_remove=roadmap_remove,
                        encode_pkl=encode_pkl,
                        roadmap_pkl=roadmap_pkl,
                        GGRmap_pkl=GGRmap_pkl,
                        GSMGSE_pkl=GSMGSE_pkl,
                        cwd=cwd,
                        process=process)
        return
    return 1
Exemplo n.º 10
0
def GCF_query():
    '''
    this function provide an entrance to query function
    '''
    if (len(sys.argv) < 3) and ('-h' not in sys.argv) and ('--help'
                                                           not in sys.argv):
        # at least one parameter need to be specified, will print help message if no parameter is specified
        print "\nusage:\n\npython CSP.py query [optional arguments] <ID list> <outputpath>\n\nfor more help, please try: python CSP.py query -h\n"
        return 1

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        usage=
        "\n\npython CSP.py query [optional arguments] <ID list> <outputpath>\n\n",
        description='',
        epilog="Chen lab, Houston Methodist")
    parser.add_argument(
        'command',
        default=None,
        help="set as 'query' to looking for samples' NGS sequencing information"
    )

    parser.add_argument(
        'IDs',
        default=None,
        help=
        "list of IDs need to used to looking for the NGS sequencing information, "
        "it could be a list of IDs separated by ',', or a file containing a list of IDs,"
        " accepted IDs: GSM, GSE, SRR, SRP, SRX, SAMN, SRP, ",
    )

    parser.add_argument('-o',
                        '--output_path',
                        dest='output_path',
                        default=None,
                        metavar='',
                        help="specify the output file name and path.")

    args = None

    if '-h' in sys.argv or '--help' in sys.argv:  # print help information once required by user
        print "\Chipseqpair\n"
        parser.print_help()
        print "\n"
        return 0
    elif len(sys.argv) >= 3:
        try:
            args = parser.parse_args()
        except:
            print "\nfor more help, please try: python CSP.py query -h\n"
            return 1

    if args is not None:
        output_path = args.output_path

        if output_path is None:
            output_path = os.getcwd() + "/query.txt"

        GEO_ids = args.IDs
        if os.path.exists(GEO_ids) and os.path.isfile(GEO_ids):
            list_names_obj = open(GEO_ids, "r")
            list_names = []
            for line in list_names_obj.readlines():
                line = line.strip().split(",")
                for l in line:
                    list_names.append(l)
            list_names_obj.close()
            id_list = list(set(list_names))
        else:
            id_list = GEO_ids.split(",")

        settings = get_settings()
        GSMGSE_pkl = settings['GSMGSE_pkl_path']
        GSM_SRR_pkl = settings['GSMtoSRRpkl']

        GEO_query(id_list, output_path, GSMGSE_pkl, GSM_SRR_pkl)
        return

    return 1
Exemplo n.º 11
0
def GCF_search():
    '''
    this function provie an entrance to search function

    '''
    if (len(sys.argv) < 3) and ('-h' not in sys.argv) and ('--help'
                                                           not in sys.argv):
        # at least one parameter need to be specified, will print help message if no parameter is specified
        print "\nusage:\n\npython CSP.py search [optional arguments] <features> <output_prefix> <output_path>\n\nfor more help, please try: python CSP.py search -h\n"
        return 1

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        usage=
        "\n\npython CSP.py search [optional arguments] <features> <output_prefix> <output_path>\n\n",
        description='',
        epilog="Chen lab, Houston Methodist")
    parser.add_argument(
        'command',
        default=None,
        help="set as 'search' to looking for samples with key words")

    parser.add_argument(
        'feature_key_words',
        default=None,
        help=
        "list of feature key words need to used to looking for the NGS sequencing samples, "
        "different key words need to be separated by ','")
    ## optional parameters
    parser.add_argument(
        '-b',
        dest='keywords_begin',
        metavar='',
        default='',
        help=
        "list of key words in features need to be used to occur in the beginning of a word, "
        "different key words need to be separated by ','")
    parser.add_argument(
        '-t',
        '--type',
        dest='type_seq',
        metavar='',
        default='chip-seq',
        help="type of sequencing specified in the search. Default is 'chip-seq"
    )
    parser.add_argument(
        '-c',
        '--ignorecase',
        dest='ignorecase',
        metavar='',
        default=1,
        type=int,
        help=
        "specify whether case (A vs a) need to be ignored in the search. Default is 1 which means case will be ignored in the search. Set to 0 if don't want to ignore the case. "
    )
    parser.add_argument(
        '--geo',
        dest='geo',
        default=0,
        type=int,
        metavar='',
        help=
        "specify whether search will be limited to GEO website search result. Default is 0. Set to 1 if only want to correct GEO search results."
    )
    parser.add_argument(
        '--candidateslist',
        dest='geo_file',
        default=None,
        metavar='',
        help=
        "specify the file path of GSM ID list if '--hasCandidates' set to 1")
    parser.add_argument(
        '-s',
        '--species',
        dest='species',
        default='H**o sapiens',
        metavar='',
        help=
        "specify the samples' species. Default is H**o sapiens. Please use the species official name. For example, human is H**o sapiens."
        "If the species name contains space, surround the name with double quotes, for example \"H**o sapiens\""
    )
    parser.add_argument('-m',
                        '--metadata',
                        dest='MetaData',
                        default=None,
                        metavar='',
                        help="specify the GSMs metadata files path")
    parser.add_argument(
        '-p',
        '--process',
        dest='process',
        default=20,
        type=int,
        metavar='',
        help="specify the number of parallel search processes want to use.")
    parser.add_argument('-f',
                        '--output_prefix',
                        dest='output_prefix',
                        default=None,
                        metavar='',
                        help="specify the output file prefix.")
    parser.add_argument('-o',
                        '--output_path',
                        dest='output_path',
                        default=None,
                        metavar='',
                        help="specify the output file location.")

    args = None

    if '-h' in sys.argv or '--help' in sys.argv:  # print help information once required by user
        print "\Chipseqpair\n"
        parser.print_help()
        print "\n"
        return 0
    elif len(sys.argv) >= 3:
        try:
            args = parser.parse_args()
        except:
            print "\nfor more help, please try: python CSP.py search -h\n"
            return 1

    if args is not None:
        settings = get_settings()
        encode_pkl = settings['Encode']
        roadmap_pkl = settings['Roadmap']
        GGRmap_pkl = settings['GGR']
        GSMGSE_pkl = settings['GSMGSE_pkl_path']

        keywords = args.feature_key_words.split(",")

        output_prefix = args.output_prefix
        if output_prefix is None:
            output_prefix = keywords[0]

        output_path = args.output_path
        if output_path is None:
            output_path = './search_output/'
            if not os.path.isdir(output_path):
                os.system("mkdir search_output")

        if args.keywords_begin == '':
            keywords_begin = []
        else:
            keywords_begin = args.keywords_begin.split(",")

        type_seq = args.type_seq
        ignorcase = args.ignorecase
        geo = args.geo
        geo_file = args.geo_file

        species = args.species
        encode_remove = 1
        roadmap_remove = 1

        cwd = args.MetaData
        process = args.process

        if cwd is None:
            cwd = settings['MetaData']

        if cwd is None or cwd == "None":
            cwd = None
            encode_remove = True
            roadmap_remove = True

        SOFTQuickParser(output_prefix,
                        output_path,
                        keywords,
                        keywords_begin,
                        type_seq=type_seq,
                        ignorecase=ignorcase,
                        geo=geo,
                        geofile=geo_file,
                        output_type=species,
                        encode_remove=encode_remove,
                        roadmap_remove=roadmap_remove,
                        encode_pkl=encode_pkl,
                        roadmap_pkl=roadmap_pkl,
                        GGRmap_pkl=GGRmap_pkl,
                        GSMGSE_pkl=GSMGSE_pkl,
                        cwd=cwd,
                        process=process)
        return

    return 1
Exemplo n.º 12
0
        for gse in sampleSeriesID:
            if gse in excludeGSEs:
                exclude = True

        if not exclude and sampleLibraryStrategy.lower() == 'chip-seq':
            results.append(sample)

            for gse_id in sample.series:
                GSE_GSM[gse_id].add(sample.id)

    queue.put((results, GSE_GSM))
    if db is not None:
        db.close()


settings = get_settings()
encode_pkl = settings['Encode']
roadmap_pkl = settings['Roadmap']
GGRmap_pkl = settings['GGR']

encodeGSE = load_obj(encode_pkl)

roadmapGSE = load_obj(roadmap_pkl)

GGRmapGSE = load_obj(GGRmap_pkl)

excludedGSE = set()

excludedGSE = excludedGSE.union(encodeGSE)
excludedGSE = excludedGSE.union(roadmapGSE)
excludedGSE = excludedGSE.union(GGRmapGSE)