Exemplos de parse em Python, exemplos de utils.parse.parse em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: bot.py Projeto: sQu4rks/chatbot_lab

def webex_message():
    raw_json = request.get_json()
    data = raw_json['data']
    if not webex_api.people.me().id == data['personId']:
        # Check that this is not a message by the bot itself
        message = webex_api.messages.get(data['id'])

        text = message.text

        # Get raw command string - Webex Teams bots are mentioned in the beginning
        command_string = " ".join(str(text).lower().split(" ")[1:])

        # Set target to our source room id
        webex_broker.set_target_id(data['roomId'])
        # Parse the command string and trigger a return
        cmd, args = parse(command_string)
        cf.run_command(cmd, args)

    return jsonify({'success': True})

Exemplo n.º 2

0

Exibir arquivo

Arquivo: data.py Projeto: misgorod/datafactoring

def upload():
    if 'file' not in request.files:
        return make_response('No file sent', 400)

    req_file = request.files['file']
    if req_file.filename == '':
        return make_response('No file selected', 400)

    if req_file:
        filename = secure_filename(req_file.filename)
        parsed_file = parse(req_file, 500)

        cols = []
        for k, v in parsed_file.items():
            cols.append(Column(name=k, data=v))

        new_data = Data(created=datetime.datetime.now(), columns=cols)
        req_file.stream.seek(0)
        new_data.file.put(req_file,
                          content_type='text/csv',
                          filename=req_file.filename,
                          encoding='utf-8')
        new_data.save()
        result = {
            'id':
            str(new_data.id),
            'name':
            'Raw data',
            'data':
            lists_to_csv([{
                'name': column.name,
                'data': column.data
            } for column in new_data.columns]),
            'type':
            constants.LINE_CHART
        }
        return jsonify(result), 200

Exemplo n.º 3

0

Exibir arquivo

Arquivo: scosy.py Projeto: guerrajorge/PubMed

def main():
    """
    starts running the script
    :return: None.
    """

    # get the the path for the input file argument
    parser = argparse.ArgumentParser()
    parser.add_argument('-r',
                        '--retrieve',
                        help='arg use to pull data from PubMed',
                        action='store_true')
    parser.add_argument(
        '-p',
        '--process',
        help='arg use to process the info into paper, author, medical and '
        'title_abstracts records',
        action='store_true')
    parser.add_argument('-a',
                        '--analyze',
                        help='run topic modeling on the file',
                        action='store_true')
    parser.add_argument(
        '-f',
        '--file',
        help=
        'file to process. Depending on whether the retrieve, process or analysis '
        'options were selected, there is a different default file')
    parser.add_argument('-l',
                        '--log',
                        dest='logLevel',
                        choices=['DEBUG', 'INFO', 'ERROR'],
                        type=str.upper,
                        help='Set the logging level')

    if sys.platform == "darwin" or sys.platform == "win32":
        if sys.platform == "win32":
            path = 'D:\dataset\scosy\dataset'
        else:
            path = '/Volumes/dataset/scosy/dataset'
    # Respublica
    else:
        path = 'dataset/'

    args = parser.parse_args()
    logger_initialization(log_level=args.logLevel)
    logging.getLogger('line.regular.time.line').info('Running SCOSY')

    if args.retrieve:

        logging.getLogger('regular').info('retrieving data from PudMed')

        # databases such as PubMed, GenBank, GEO, and many others
        # Use the mandatory email parameter so the NCBI can contact you if there is a proble
        Entrez.email = "*****@*****.**"  # Always tell NCBI who you are
        logging.getLogger('regular').info(
            'searching PubMed for CHOP and UPENN authors')
        handle = Entrez.esearch(
            db="pubmed",
            retmax=100000000,
            idtype="esearch",
            mindate="2014/01/01",
            maxdate="2020/08/21",
            term=
            "Perelman School of Medicine[Affiliation] OR Children's Hospital of "
            "Philadelphia[Affiliation] OR University of Pennsylvania School of "
            "Medicine[Affiliation] OR School of Medicine University of "
            "Pennsylvania[Affiliation]",
            usehistory="y")
        search_results = Entrez.read(handle)
        handle.close()
        # obtaining the list of relevant PMIDs
        id_list = search_results["IdList"]

        # get all the record based on the PMIDs
        # logging.getLogger('regular.time').info('getting relevant authors\' records based on PMIDs')
        fetch_records_handle = Entrez.efetch(db="pubmed",
                                             id=id_list,
                                             retmode="text",
                                             rettype="medline")
        # need to read all the data from the handle and store in a file because if we just read line by line from the
        # generator and the internet connection is not strong, then we run into http errors:
        # http.client.IncompleteRead: IncompleteRead(0 bytes read)
        result_path = Path(path, 'results.txt')
        out_handle = result_path.open('w+')
        out_handle.write(fetch_records_handle.read())
        # the results are now in the results.xml file and the original handle has had all of its data extracted
        # (so we close it)
        out_handle.close()
        msg = 'saved authors\' records on local file = {0}'.format(result_path)
        logging.getLogger('regular.time').info(msg)

    elif args.process:

        # import data from file
        logging.getLogger('regular').info('reading data from result file')

        file_name = args.file
        if not file_name:
            file_name = 'results.txt'

        result_path = Path(path, file_name)
        records_handle = result_path.open()
        fetch_records = parse(handle=records_handle)

        # initializing variables
        mesh_description_dict = obtain_descriptions()

        # contains all the metadata elements on the author level: PubMed unique Identifier number(PMID), AuthorID (as a
        # (CA) Ordinary Author (OA) or Principal Author (PA) and the author's affiliation
        author_record_df = pd.DataFrame(columns=[
            'PMID', 'Author', 'author_chop', 'author_penn', 'Role',
            'AffiliationInfo'
        ])
        # contains all the metadata elements on the paper level: PubMed unique Identifier number(PMID), Title, Abstract,
        # Year, Month, AuthorList, SubjectList, date
        paper_record_df = pd.DataFrame(columns=[
            'PMID', 'Title', 'Abstract', 'Year', 'Month', 'author_list',
            'subject_list', 'date'
        ])
        # contains all the metadata of the medical information: PubMed unique Identifier number(PMID), Primary Medical
        # Subject Header (MESH) and the description ID
        medical_record_df = pd.DataFrame(
            columns=['PMID', 'Desc', 'Primary_MeSH'])

        title_list = list()
        abstract_list = list()

        # get the relevant information for each record
        for record_index, record in enumerate(fetch_records):

            logging.getLogger('regular').debug(
                'record index = {0}'.format(record_index))

            try:
                pmid = record.get('PMID')
                title = record.get('TI')
                abstract = record.get('AB')
                authors = record.get('FAU')
                affiliations = record.get('AD')
                publication_type = record.get('PT')
                mesh_term = record.get('MH')
                date_created = record.get('EDAT')
                year, month = date_created.split('/')[:2]
                date = year + '/' + month

                logging.getLogger('regular').debug('pmid = {0}'.format(pmid))
                logging.getLogger('regular').debug('title = {0}'.format(title))
                logging.getLogger('regular').debug(
                    'abstract = {0}'.format(abstract))
                logging.getLogger('regular').debug(
                    'authors = {0}'.format(authors))
                logging.getLogger('regular').debug(
                    'affiliations = {0}'.format(affiliations))
                logging.getLogger('regular').debug(
                    'publication type = {0}'.format(publication_type))
                logging.getLogger('regular').debug(
                    'mesh term = {0}'.format(mesh_term))
                logging.getLogger('regular').debug(
                    'data created = {0}'.format(date_created))

                # assign the chief author, ordinary author or principal investigator role to each author
                roles = assign_roles(authors)
                # check and assign whether the authors belong to the CHOP or PENN organization
                chop_organization, penn_organization = assign_organization(
                    affiliations)

                mesh_description = ''
                if mesh_term is None:
                    mesh_term = ''
                else:
                    mesh_description, term = convert_mesh_description(
                        mesh_description_dict, mesh_term)
                    mesh_term = ';'.join(mesh_term)

                # output information
                if mesh_description:
                    row = pd.DataFrame(
                        [[pmid, term, mesh_description]],
                        columns=['PMID', 'Primary_MeSH', 'Desc'])
                    medical_record_df = medical_record_df.append(
                        row, ignore_index=True)

                for author_index, organizations in enumerate(
                        zip(chop_organization, penn_organization)):
                    # check if the author belongs to either CHOP or PENN
                    if 1 in organizations:
                        row = pd.DataFrame([[
                            pmid, authors[author_index], organizations[0],
                            organizations[1], roles[author_index],
                            affiliations[author_index]
                        ]],
                                           columns=[
                                               'PMID', 'Author', 'author_chop',
                                               'author_penn', 'Role',
                                               'AffiliationInfo'
                                           ])
                        author_record_df = author_record_df.append(
                            row, ignore_index=True)

                authors = ';'.join(authors)

                row = pd.DataFrame([[
                    pmid, title, abstract, year, month, authors, mesh_term,
                    date
                ]],
                                   columns=[
                                       'PMID', 'Title', 'Abstract', 'Year',
                                       'Month', 'author_list', 'subject_list',
                                       'date'
                                   ])
                paper_record_df = paper_record_df.append(row)

                title_list.append(title)
                abstract_list.append(abstract)

            except Exception as e:
                msg = 'Error while processing PMID={0}'.format(pmid)
                logging.getLogger('regular').debug(msg)
                msg = 'Exception message = {0}'.format(e)
                logging.getLogger('regular').debug(msg)

        # contains all the metadata elements on the author level: Pubmed unique Identifier number(PMID), AuthorID (as a
        # (CA) Ordinary Author (OA) or Principal Author (PA) and the author's affiliation
        author_path = Path(path, 'author_record.csv')
        author_record_df.to_csv(author_path, index=False)
        # contains all the metadata elements on the paper level: Pubmed unique Identifier number(PMID), Title, Abstract,
        # Year, Month, AuthorList, SubjectList, date
        paper_path = Path(path, 'paper_record.csv')
        paper_record_df.to_csv(paper_path, index=False)
        # contains all the metadata of the medical information: Pubmed unique Identifier number(PMID), Primary Medical
        # Subject Header (MESH) and the description ID
        medical_path = Path(path, 'medical_record.csv')
        medical_record_df.to_csv(medical_path, index=False)

        # store the record in a file for processing
        dataset = dict()
        dataset['title'] = title_list
        dataset['abstracts'] = abstract_list
        dataset['mesh'] = mesh_term
        dataset = pd.DataFrame(dataset)
        titles_abstracts_mesh_path = Path(path, 'titles_abstracts_mesh.csv')
        dataset.to_csv(path_or_buf=titles_abstracts_mesh_path, index=False)

    logging.getLogger('line.regular.time.line').info(
        'SCOSY finished running successfully.')

Exemplo n.º 4

0

Exibir arquivo

def main():
    # get the the path for the input file argument
    parser = argparse.ArgumentParser()
    parser.add_argument("-l",
                        "--log",
                        dest="logLevel",
                        choices=['DEBUG', 'INFO', 'ERROR'],
                        type=str.upper,
                        help="Set the logging level")
    args = parser.parse_args()

    logger_initialization(log_level=args.logLevel)

    logging.getLogger('line.regular.time.line').info(
        'Running Recommendation System script')

    # import data from file
    logging.getLogger('regular').info('reading data from file')

    # databases such as PubMed, GenBank, GEO, and many others
    # Use the mandatory email parameter so the NCBI can contact you if there is a proble
    Entrez.email = "*****@*****.**"  # Always tell NCBI who you are
    logging.getLogger('regular').info(
        'searching pubmed for the CHOP and UPENN authors')
    handle = Entrez.esearch(
        db="pubmed",
        retmax=50000,
        idtype="esearch",
        mindate="2014/01/01",
        maxdate="2017/05/01",
        term=
        "Perelman School of Medicine[Affiliation] OR Children's Hospital of "
        "Philadelphia[Affiliation] OR University of Pennsylvania School of "
        "Medicine[Affiliation] OR School of Medicine University of Pennsylvania[Affiliation]",
        usehistory="y")
    search_results = Entrez.read(handle)
    handle.close()
    # obtaining the list of relevant PMIDs
    id_list = search_results["IdList"]

    # get all the record based on the PMIDs
    logging.getLogger('regular').info(
        'getting relevant authors\' records based on PMIDs')
    fetch_records_handle = Entrez.efetch(db="pubmed",
                                         id=id_list,
                                         retmode="text",
                                         rettype="medline")
    # need to read all the data from the handle and store in a file because if we just read line by line from the
    # generator and the internet connection is not strong, then we run into http errors:
    # http.client.IncompleteRead: IncompleteRead(0 bytes read)
    logging.getLogger('regular').info(
        'storing authors\' records on local file')
    with open("results.xml", "w") as out_handle:
        out_handle.write(fetch_records_handle.read())
    # the results are now in the results.xml file and the original handle has had all of its data extracted
    # (so we close it)
    fetch_records_handle.close()

    logging.getLogger('regular').info('reading result files')
    records_handle = open("results.xml")
    fetch_records = parse(handle=records_handle)

    # initializing variables
    mesh_description_dict = obtain_descriptions()

    # contains all the metadata elements on the author level: Pubmed unique Identifier number(PMID), AuthorID (as a
    # (CA) Ordinary Author (OA) or Principal Author (PA) and the author's affiliation
    author_record_df = pd.DataFrame(columns=[
        'PMID', 'AuthorID', 'Author CHOP', 'Author PENN', 'ROLE', 'Affiliation'
    ])
    # contains all the metadata elements on the paper level: Pubmed unique Identifier number(PMID), Title, Abstract,
    # Year, Month, AuthorList, SubjectList, date
    paper_record_df = pd.DataFrame(columns=[
        'PMID', 'Title', 'Abstract', 'Year', 'Month', 'Author List',
        'Subject List', 'Date'
    ])
    # contains all the metadata of the medical information: Pubmed unique Identifier number(PMID), Primary Medical
    # Subject Header (MESH) and the description ID
    medical_record_df = pd.DataFrame(columns=['PMID', 'MESH', 'Description'])

    title_list = list()
    abstract_list = list()

    # get the relevant information for each record
    for record_index, record in enumerate(fetch_records):

        logging.getLogger('regular').debug(
            'record index = {0}'.format(record_index))

        try:
            pmid = record.get('PMID')
            title = record.get('TI')
            abstract = record.get('AB')
            authors = record.get('FAU')
            affiliations = record.get('AD')
            publication_type = record.get('PT')
            mesh_term = record.get('MH')
            date_created = record.get('EDAT')
            year, month = date_created.split('/')[:2]
            date = year + '/' + month

            logging.getLogger('regular').debug('pmid = {0}'.format(pmid))
            logging.getLogger('regular').debug('title = {0}'.format(title))
            logging.getLogger('regular').debug(
                'abstract = {0}'.format(abstract))
            logging.getLogger('regular').debug('authors = {0}'.format(authors))
            logging.getLogger('regular').debug(
                'affiliations = {0}'.format(affiliations))
            logging.getLogger('regular').debug(
                'publication type = {0}'.format(publication_type))
            logging.getLogger('regular').debug(
                'mesh term = {0}'.format(mesh_term))
            logging.getLogger('regular').debug(
                'data created = {0}'.format(date_created))

            # assign the chief author, ordinary author or principal investigator role to each author
            roles = assign_roles(authors)
            # check and assign whether the authors belong to the CHOP or PENN organization
            chop_organization, penn_organization = assign_organization(
                affiliations)

            mesh_description = ''
            if mesh_term is None:
                mesh_term = ''
            else:
                term, mesh_description = convert_mesh_description(
                    mesh_description_dict, mesh_term)
                mesh_term = ';'.join(mesh_term)

            # output information
            if mesh_description:
                row = pd.DataFrame([[pmid, term, mesh_description]],
                                   columns=['PMID', 'Mesh', 'Description'])
                medical_record_df = medical_record_df.append(row,
                                                             ignore_index=True)

            for author_index, organizations in enumerate(
                    zip(chop_organization, penn_organization)):
                if 1 in organizations:
                    row = pd.DataFrame([[
                        pmid, authors[author_index], organizations[0],
                        organizations[1], roles[author_index],
                        affiliations[author_index]
                    ]],
                                       columns=[
                                           'PMID', 'AuthorID', 'Author CHOP',
                                           'Author PENN', 'ROLE', 'Affiliation'
                                       ])
                    author_record_df = author_record_df.append(
                        row, ignore_index=True)

            authors = ';'.join(authors)

            row = pd.DataFrame([[
                pmid, title, abstract, year, month, authors, mesh_term, date
            ]],
                               columns=[
                                   'PMID', 'Title', 'Abstract', 'Year',
                                   'Month', 'Author List', 'Subject List',
                                   'Date'
                               ])
            paper_record_df = paper_record_df.append(row)

            title_list.append(title)
            abstract_list.append(abstract)

        except Exception as e:
            msg = 'Error while processing PMID={0}'.format(pmid)
            logging.getLogger('regular').debug(msg)
            msg = 'Exception message = {0}'.format(e)
            logging.getLogger('regular').debug(msg)

    # store the record in a file for processing
    dataset = dict()
    dataset['title'] = title_list
    dataset['abstracts'] = abstract_list
    dataset = pd.DataFrame(dataset)
    dataset.to_csv(path_or_buf='record_results/titles_abstracts.csv',
                   index=False)

    # read the records from the file
    # dataset = pd.read_csv('record_results/titles_abstracts.csv')

    # topic_modeling(dataset=dataset)

    pandas.io.formats.excel.header_style = None
    # contains all the metadata elements on the author level: Pubmed unique Identifier number(PMID), AuthorID (as a
    # (CA) Ordinary Author (OA) or Principal Author (PA) and the author's affiliation
    author_record_df.to_excel('record_results/author_record.xlsx',
                              sheet_name='author_record',
                              index=False)
    # contains all the metadata elements on the paper level: Pubmed unique Identifier number(PMID), Title, Abstract,
    # Year, Month, AuthorList, SubjectList, date
    paper_record_df.to_excel('record_results/paper_record.xlsx',
                             sheet_name='paper_record',
                             index=False)
    # contains all the metadata of the medical information: Pubmed unique Identifier number(PMID), Primary Medical
    # Subject Header (MESH) and the description ID
    medical_record_df.to_excel('record_results/medical_record.xlsx',
                               sheet_name='medical_record',
                               index=False)

    logging.getLogger('line.regular.time.line').info(
        'Recommendation System script finished running successfully.')

Exemplo n.º 5

0

Exibir arquivo

p=< 2,0,0>, v=<-2,0,0>, a=<-2,0,0>                      (1)   (0)

p=< 4,0,0>, v=< 0,0,0>, a=<-1,0,0>    -4 -3 -2 -1  0  1  2  3  4
p=<-2,0,0>, v=<-4,0,0>, a=<-2,0,0>          (1)               (0)

p=< 3,0,0>, v=<-1,0,0>, a=<-1,0,0>    -4 -3 -2 -1  0  1  2  3  4
p=<-8,0,0>, v=<-6,0,0>, a=<-2,0,0>                         (0)

At this point, particle 1 will never be closer to <0,0,0> than particle 0, and
so, in the long run, particle 0 will stay closest.

Which particle will stay closest to position <0,0,0> in the long term?
"""
from utils.parse import parse

p = parse()
tick = 0

while True:
    if tick > 1000:
        print(min(p, key=lambda x: x['d'])['id'])
        break
    for i in range(0, len(p)):
        d = p[i]['d']
        p[i]['v'][0] += p[i]['a'][0]
        p[i]['v'][1] += p[i]['a'][1]
        p[i]['v'][2] += p[i]['a'][2]
        p[i]['p'][0] += p[i]['v'][0]
        p[i]['p'][1] += p[i]['v'][1]
        p[i]['p'][2] += p[i]['v'][2]
        p[i]['d'] = abs(p[i]['p'][0]) + abs(p[i]['p'][1]) + abs(p[i]['p'][2])

Exemplo n.º 6

0

Exibir arquivo

def main():
    # get the the path for the input file argument
    parser = argparse.ArgumentParser()
    parser.add_argument("-l",
                        "--log",
                        dest="logLevel",
                        choices=['DEBUG', 'INFO', 'ERROR'],
                        type=str.upper,
                        help="Set the logging level")
    args = parser.parse_args()

    logger_initialization(log_level=args.logLevel)

    logging.getLogger('line.regular.time.line').info(
        'Running Recommendation System script')

    # import data from file
    logging.getLogger('regular').info('reading data from file')

    # Entrez (http://www.ncbi.nlm.nih.gov/Entrez) is a data retrieval system that provides users access to NCBI’s
    # databases such as PubMed, GenBank, GEO, and many others
    # Use the mandatory email parameter so the NCBI can contact you if there is a proble
    Entrez.email = "*****@*****.**"  # Always tell NCBI who you are
    # logging.getLogger('regular').info('searching pubmed for the CHOP and UPENN authors')
    # handle = Entrez.esearch(db="pubmed", retmax=50000, idtype="esearch", mindate="2014/01/01", maxdate="2017/05/01",
    #                         term="Perelman School of Medicine[Affiliation] OR Children's Hospital of "
    #                              "Philadelphia[Affiliation] OR University of Pennsylvania School of "
    #                              "Medicine[Affiliation] OR School of Medicine University of Pennsylvania[Affiliation]",
    #                         usehistory="y")
    # search_results = Entrez.read(handle)
    # handle.close()
    # # obtaining the list of relevant PMIDs
    # id_list = search_results["IdList"]
    #
    # # get all the record based on the PMIDs
    # logging.getLogger('regular').info('getting relevant authors\' records based on PMIDs')
    # fetch_records_handle = Entrez.efetch(db="pubmed", id=id_list, retmode="text", rettype="medline")
    # # need to read all the data from the handle and store in a file because if we just read line by line from the
    # # generator and the internet connection is not strong, then we run into http errors:
    # # http.client.IncompleteRead: IncompleteRead(0 bytes read)
    # logging.getLogger('regular').info('storing authors\' records on local file')
    # with open("results.xml", "w") as out_handle:
    #     out_handle.write(fetch_records_handle.read(validate=True))
    # # the results are now in the results.xml file and the original handle has had all of its data extracted
    # # (so we close it)
    # fetch_records_handle.close()

    logging.getLogger('regular').info('reading result files')
    records_handle = open("results.xml")
    fetch_records = parse(records_handle)

    # initializing variables
    mesh_description_dict = obtain_descriptions()

    # PMID=PubMed Unique Identifier, TI=Title, AB=Abstract, AD=Affiliation, FAU=Full Author, MH=MeSH Terms,
    # PT=Publication Type
    # for more information, look at the abbreviations in the /template/abbreviations.txt file
    author_information = {
        'PMID': '',
        'TI': '',
        'AB': '',
        'FAU': '',
        'AU': '',
        'MH': '',
        'PT': '',
        'AD': ''
    }

    author_list = list()
    affiliation_list = list()
    mesh_list = list()

    first_record = True

    # get the relevant information for each record
    for record_index, line in enumerate(fetch_records):
        logging.getLogger('regular').debug(
            'line index = {0}'.format(record_index))

        # remove new line delimiter
        line = line.replace('\n', '')

        # skip if empty string
        if not line:
            continue

        # getting the key (PMID, TITLE, ABSTRACT, etc) and its value
        key, value = line.split('- ')
        # remove spaces
        key.replace(' ', '')

        # check if key is relevant to the information of interest
        if key not in author_information.keys():
            continue

        if key == 'PMID':
            # if it is not the first record, that means that it is a new record and therefore needs to reset all the
            # variables
            if not first_record:
                author_information['AU'] = author_list
                author_information['AD'] = affiliation_list
                author_information['MH'] = mesh_list

                logging.getLogger('regular').debug(
                    'authors\' information = {0}'.format(author_information))

                # function to print's the author's information to the relevant files
                # output_author_information(author_information)

                author_information = dict['PMID':'', 'TI':'', 'AB':'',
                                          'FAU':'', 'AU', 'ROLE':'', 'MH':'',
                                          'PT':'', 'AD':'']

                author_list = list()
                affiliation_list = list()

        # there might be multiple authors per PMID and therefore we need to add them to a list
        if key == 'FAU':
            author_list.append(value)
        # each author might have one or more affiliations
        elif key == 'AD':
            affiliation_list.append(value)
        # there might be multiple mesh terms
        elif key == 'MH':
            # some of the mesh terms might have an * that needs to be removed
            mesh_list.append(value.replace('*', ''))

        # add the authors' information
        author_information[key] = value

        # changing first record flag
        first_record = False

    logging.getLogger('line.regular.time.line').info(
        'Recommendation System script finished running successfully.')

Exemplo n.º 7

0

Exibir arquivo

def main():
    # get the the path for the input file argument
    parser = argparse.ArgumentParser()
    parser.add_argument("-l", "--log", dest="logLevel", choices=['DEBUG', 'INFO', 'ERROR'], type=str.upper,
                        help="Set the logging level")
    args = parser.parse_args()

    logger_initialization(log_level=args.logLevel)

    logging.getLogger('line.regular.time.line').info('Running Recommendation System script')

    # import data from file
    logging.getLogger('regular').info('reading data from file')

    # Entrez (http://www.ncbi.nlm.nih.gov/Entrez) is a data retrieval system that provides users access to NCBI’s
    # databases such as PubMed, GenBank, GEO, and many others
    # Use the mandatory email parameter so the NCBI can contact you if there is a proble
    Entrez.email = "*****@*****.**"     # Always tell NCBI who you are
    # logging.getLogger('regular').info('searching pubmed for the CHOP and UPENN authors')
    # handle = Entrez.esearch(db="pubmed", retmax=50000, idtype="esearch", mindate="2014/01/01", maxdate="2017/05/01",
    #                         term="Perelman School of Medicine[Affiliation] OR Children's Hospital of "
    #                              "Philadelphia[Affiliation] OR University of Pennsylvania School of "
    #                              "Medicine[Affiliation] OR School of Medicine University of Pennsylvania[Affiliation]",
    #                         usehistory="y")
    # search_results = Entrez.read(handle)
    # handle.close()
    # # obtaining the list of relevant PMIDs
    # id_list = search_results["IdList"]
    #
    # # get all the record based on the PMIDs
    # logging.getLogger('regular').info('getting relevant authors\' records based on PMIDs')
    # fetch_records_handle = Entrez.efetch(db="pubmed", id=id_list, retmode="text", rettype="medline")
    # # need to read all the data from the handle and store in a file because if we just read line by line from the
    # # generator and the internet connection is not strong, then we run into http errors:
    # # http.client.IncompleteRead: IncompleteRead(0 bytes read)
    # logging.getLogger('regular').info('storing authors\' records on local file')
    # with open("results.xml", "w") as out_handle:
    #     out_handle.write(fetch_records_handle.read(validate=True))
    # # the results are now in the results.xml file and the original handle has had all of its data extracted
    # # (so we close it)
    # fetch_records_handle.close()

    logging.getLogger('regular').info('reading result files')
    records_handle = open("results.xml")
    fetch_records = parse(handle=records_handle)

    # initializing variables
    mesh_description_dict = obtain_descriptions()

    # contains all the metadata of the medical information: Pubmed unique Identifier number(PMID), Primary Medical
    # Subject Header (MESH) and the description ID
    medical_record_file = open('record_results/medical_record.csv', 'w')
    medical_record_file.write('PMID,Primary MeSH,Description\n')
    # contains all the metadata elements on the author level: Pubmed unique Identifier number(PMID), AuthorID (as a
    # combination of the author’s last name, first name, and initials), institution: chop=0, Penn=1, Role: Chief Author
    # (CA) Ordinary Author (OA) or Principal Author (PA) and the author's affiliation
    author_record_file = open('record_results/author_record.csv', 'w')
    author_record_file.write('PMID,Author,Author_CHOP,Author_PENN,Role,Affiliation\n')
    # contains all the metadata elements on the paper level: Pubmed unique Identifier number(PMID), Title, Abstract,
    # Year, Month, AuthorList, SubjectList, date
    paper_record_file = open('record_results/paper_record.csv', 'w')
    paper_record_file.write('PMID,Title,Abstract,Year,Month,Author List,Subject List,Date\n')

    # get the relevant information for each record
    for record_index, record in enumerate(fetch_records):

        logging.getLogger('regular').debug('record index = {0}'.format(record_index))

        pmid = record.get('PMID')
        title = record.get('TI')
        abstract = record.get('AB')
        authors = record.get('FAU')
        affiliations = record.get('AD')
        publication_type = record.get('PT')
        mesh_term = record.get('MH')
        date_created = record.get('EDAT')
        year, month = date_created.split('/')[:2]
        date = year + '/' + month

        logging.getLogger('regular').debug('pmid = {0}'.format(pmid))
        logging.getLogger('regular').debug('title = {0}'.format(title))
        logging.getLogger('regular').debug('abstract = {0}'.format(abstract))
        logging.getLogger('regular').debug('authors = {0}'.format(authors))
        logging.getLogger('regular').debug('affiliations = {0}'.format(affiliations))
        logging.getLogger('regular').debug('publication type = {0}'.format(publication_type))
        logging.getLogger('regular').debug('mesh term = {0}'.format(mesh_term))
        logging.getLogger('regular').debug('data created = {0}'.format(date_created))

        # assign the chief author, ordinary author or principal investigator role to each author
        roles = assign_roles(authors)
        # check and assign whether the authors belong to the CHOP or PENN organization
        chop_organization, penn_organization = assign_organization(affiliations)

        mesh_description = ''
        if mesh_term is None:
            mesh_term = ''
        else:
            term, mesh_description = convert_mesh_description(mesh_description_dict, mesh_term)
            mesh_term = ';'.join(mesh_term)

        # output information
        if mesh_description:
            msg = print_str(pmid, term, mesh_description)
            medical_record_file.write(msg)

        for author_index, organizations in enumerate(zip(chop_organization, penn_organization)):
            if 1 in organizations:
                msg = print_str(pmid, authors[author_index], organizations[0], organizations[1], roles[author_index],
                                affiliations[author_index])
                author_record_file.write(msg)

        authors = ';'.join(authors)

        msg = print_str(pmid, title, abstract, year, month, authors, mesh_term, date)
        paper_record_file.write(msg)

        if record_index == 10:
            break

    # closing all open files
    medical_record_file.close()
    author_record_file.close()
    paper_record_file.close()

    logging.getLogger('line.regular.time.line').info('Recommendation System script finished running successfully.')

Exemplo n.º 8

0

Exibir arquivo

    for d in devices:
        ret += "* {} running {} (mgt-ip: {}, status: {}, id: {})\n".format(
            d['series'], d['softwareType'], d['managementIpAddress'],
            d['reachabilityStatus'], d['id'])

    return ret


# Step 2: Add the command to our list of available commands
cf.create_command('list_devices', get_details_command)


def get_device_details(dnac, args):
    device_details = dnac.devices.get_device_by_id(**args).response

    ret = "### Details on Device \n"
    for k, v in device_details.items():
        ret += "* {} -> {}\n".format(k, v)

    return ret


cf.create_command('get_device_details', get_device_details)

while True:
    input_cmd = input("Please give a command: ")
    if input_cmd == "ESC":
        break
    cmd, args = parse(input_cmd)
    cf.run_command(cmd, args)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: zflow_mnist10.py Projeto: VarunSrivastavaIITD/FlowVAE

def main():
    args = parse.parse()

    # set random seeds
    random.seed(args.manual_seed)
    torch.manual_seed(args.manual_seed)
    np.random.seed(args.manual_seed)

    # prepare output directories
    base_dir = Path(args.out_dir)
    model_dir = base_dir.joinpath(args.model_name)
    if (args.resume or args.initialize) and not model_dir.exists():
        raise Exception("Model directory for resume does not exist")
    if not (args.resume or args.initialize) and model_dir.exists():
        c = ""
        while c != "y" and c != "n":
            c = input("Model directory already exists, overwrite?").strip()

        if c == "y":
            shutil.rmtree(model_dir)
        else:
            sys.exit(0)
    model_dir.mkdir(parents=True, exist_ok=True)

    summary_writer_dir = model_dir.joinpath("runs")
    summary_writer_dir.mkdir(exist_ok=True)
    save_path = model_dir.joinpath("checkpoints")
    save_path.mkdir(exist_ok=True)

    # prepare summary writer
    writer = SummaryWriter(summary_writer_dir, comment=args.writer_comment)

    # prepare data
    train_loader, val_loader, test_loader, args = load_dataset(
        args, flatten=args.flatten_image
    )

    # prepare flow model
    if hasattr(flows, args.flow):
        flow_model_template = getattr(flows, args.flow)

    flow_list = [flow_model_template(args.zdim) for _ in range(args.num_flows)]
    if args.permute_conv:
        convs = [flows.OneByOneConv(dim=args.zdim) for _ in range(args.num_flows)]
        flow_list = list(itertools.chain(*zip(convs, flow_list)))
    if args.actnorm:
        actnorms = [flows.ActNorm(dim=args.zdim) for _ in range(args.num_flows)]
        flow_list = list(itertools.chain(*zip(actnorms, flow_list)))
    prior = torch.distributions.MultivariateNormal(
        torch.zeros(args.zdim, device=args.device),
        torch.eye(args.zdim, device=args.device),
    )
    flow_model = NormalizingFlowModel(prior, flow_list).to(args.device)

    # prepare losses and autoencoder
    if args.dataset == "mnist":
        args.imshape = (1, 28, 28)
        if args.ae_model == "linear":
            ae_model = AutoEncoder(args.xdim, args.zdim, args.units, "binary").to(
                args.device
            )
            ae_loss = nn.BCEWithLogitsLoss(reduction="sum").to(args.device)

        elif args.ae_model == "conv":
            args.zshape = (8, 7, 7)
            ae_model = ConvAutoEncoder(
                in_channels=1,
                image_size=np.squeeze(args.imshape),
                activation=nn.Hardtanh(0, 1),
            ).to(args.device)
            ae_loss = nn.BCELoss(reduction="sum").to(args.device)

    elif args.dataset == "cifar10":
        args.imshape = (3, 32, 32)
        args.zshape = (8, 8, 8)
        ae_loss = nn.MSELoss(reduction="sum").to(args.device)
        ae_model = ConvAutoEncoder(in_channels=3, image_size=args.imshape).to(
            args.device
        )

    # setup optimizers
    ae_optimizer = optim.Adam(ae_model.parameters(), args.learning_rate)
    flow_optimizer = optim.Adam(flow_model.parameters(), args.learning_rate)

    total_epochs = np.max([args.vae_epochs, args.flow_epochs, args.epochs])

    if args.resume:
        checkpoint = torch.load(args.model_path, map_location=args.device)
        flow_model.load_state_dict(checkpoint["flow_model"])
        ae_model.load_state_dict(checkpoint["ae_model"])
        flow_optimizer.load_state_dict(checkpoint["flow_optimizer"])
        ae_optimizer.load_state_dict(checkpoint["ae_optimizer"])
        init_epoch = checkpoint["epoch"]
    elif args.initialize:
        checkpoint = torch.load(args.model_path, map_location=args.device)
        flow_model.load_state_dict(checkpoint["flow_model"])
        ae_model.load_state_dict(checkpoint["ae_model"])
    else:
        init_epoch = 1

    if args.initialize:
        raise NotImplementedError

    # training loop
    for epoch in trange(init_epoch, total_epochs + 1):
        if epoch <= args.vae_epochs:
            train_ae(
                epoch,
                train_loader,
                ae_model,
                ae_optimizer,
                writer,
                ae_loss,
                device=args.device,
            )
            log_ae_tensorboard_images(
                ae_model,
                val_loader,
                writer,
                epoch,
                "AE/val/Images",
                xshape=args.imshape,
            )
            # evaluate_ae(epoch, test_loader, ae_model, writer, ae_loss)

        if epoch <= args.flow_epochs:
            train_flow(
                epoch,
                train_loader,
                flow_model,
                ae_model,
                flow_optimizer,
                writer,
                device=args.device,
                flatten=not args.no_flatten_latent,
            )

            log_flow_tensorboard_images(
                flow_model,
                ae_model,
                writer,
                epoch,
                "Flow/sampled/Images",
                xshape=args.imshape,
                zshape=args.zshape,
            )

        if epoch % args.save_iter == 0:
            checkpoint_dict = {
                "epoch": epoch,
                "ae_optimizer": ae_optimizer.state_dict(),
                "flow_optimizer": flow_optimizer.state_dict(),
                "ae_model": ae_model.state_dict(),
                "flow_model": flow_model.state_dict(),
            }
            fname = f"model_{epoch}.pt"
            save_checkpoint(checkpoint_dict, save_path, fname)

    if args.save_images:
        p = Path(f"images/mnist/{args.model_name}")
        p.mkdir(parents=True, exist_ok=True)
        n_samples = 10000

        print("final epoch images")
        flow_model.eval()
        ae_model.eval()
        with torch.no_grad():
            z = flow_model.sample(n_samples)
            z = z.to(next(ae_model.parameters()).device)
            xcap = ae_model.decoder.predict(z).to("cpu").view(-1, *args.imshape).numpy()
        xcap = (np.rint(xcap) * int(255)).astype(np.uint8)
        for i, im in enumerate(xcap):
            imsave(f'{p.joinpath(f"im_{i}.png").as_posix()}', np.squeeze(im))

    writer.close()

Exemplo n.º 10

0

Exibir arquivo

Arquivo: properties.py Projeto: ishunyu/propninja

 def __init__(self, pFile):
     self.filename = pFile.name
     self.root = parse.parse(Reader(File(pFile)))