Python read_csv_table示例，csv_tools.read_csv_table Python示例

示例#1

0

显示文件

    ]
    list_sources = [list_header]

    with open(listNetAuthors, "rt") as textfile:
        author_lines = textfile.readlines()
        filtered_authors = [x.strip() for x in author_lines if x.strip()]

    processed_source = []

    for dirpath, dirs, files in os.walk(dataFolder):
        for filename in files:
            if filename[-4:].upper() == '.CSV':
                csvFile = os.path.join(dirpath, filename)
                # For each CSV file
                print('Processing file ' + csvFile)
                table = csv_tools.read_csv_table(csvFile)
                headerTable = table[0]
                if 'Authors' in headerTable and 'Source title' in headerTable and 'Abbreviated Source Title' in headerTable and 'Publisher' in headerTable and 'Conference name' in headerTable:
                    author_index = headerTable.index('Authors')
                    sourceTitle_index = headerTable.index('Source title')
                    abbr_sourceTitle_index = headerTable.index(
                        'Abbreviated Source Title')
                    publisher_index = headerTable.index('Publisher')
                    conference_index = headerTable.index('Conference name')
                    access_index = [
                        sourceTitle_index, abbr_sourceTitle_index,
                        publisher_index, conference_index
                    ]
                    for row in table[1:]:
                        if not row[sourceTitle_index] in processed_source:
                            authors_split = row[author_index].split(',')

示例#2

0

显示文件

        exportFile = 'list_citations.csv'
    if len(sys.argv) > 3:
        listReferences = str(sys.argv[3])
    else:
        listReferences = 'list_references.csv'

    #get_citations(dataFolder, exportFile, listReferences)
    list_header = [
        'ID', 'Number of citations in selected database',
        'List ID of citations', 'Title', 'Authors', 'Source title',
        'Abbreviated Source Title', 'Publisher', 'Conference name', 'Year',
        'Link'
    ]
    list_citations = [list_header]

    refTable = csv_tools.read_csv_table(listReferences)
    # Note: header of refTable is ['ID', 'Number of references', 'Number of references outside selected database', 'Number of self-cited references', 'List ID of references', 'Title', 'Authors', 'Source title', 'Publisher', 'Year', 'Link']
    number_of_citations = [0] * len(refTable)
    id_of_citing_papers = [
        [] for _ in range(len(refTable))
    ]  # initialize array of empty lists, each row contains list of papers that cite this one
    # id_of_citing_papers = [[]]*len(refTable) # theoretically this command also creates an array of empty lists, however due to Python management the append/extend for each item affects all others

    for row in refTable[1:]:
        paper_id = str(row[0])
        #print('Processing paper: ', paper_id)
        paper_ref = row[4].split(',')
        if not paper_ref == [
                ''
        ]:  # there are some references in the list of papers
            for ref in paper_ref:

示例#3

0

显示文件

文件： get_papers.py 项目： doanminhdang/citationnetwork

        exportFile = str(sys.argv[2])
    else:
        exportFile = 'list_papers.csv'
    if len(sys.argv) > 3:
        listSource = str(sys.argv[3])
    else:
        listSource = 'list_sources.csv'

    #get_papers(dataFolder, exportFile)
    list_header = [
        'ID', 'Title', 'Authors', 'Source title', 'Abbreviated Source Title',
        'Publisher', 'Conference name', 'Year', 'Link', 'Rating'
    ]
    list_papers = [list_header]

    sourceTable = csv_tools.read_csv_table(listSource)
    # Note: header of sourceTable is ['ID', 'Source title', 'Abbreviated Source Title', 'Publisher', 'Conference name', 'Rating']
    sourceTable_transpose = csv_tools.transpose_table(sourceTable)

    for dirpath, dirs, files in os.walk(dataFolder):
        for filename in files:
            if filename[-4:].upper() == '.CSV':
                csvFile = os.path.join(dirpath, filename)
                # For each CSV file
                print('Processing file ' + csvFile)
                table = csv_tools.read_csv_table(csvFile)
                headerTable = table[0]
                if 'Authors' in headerTable and 'Title' in headerTable and 'Year' in headerTable and 'Source title' in headerTable and 'DOI' in headerTable and 'Publisher' in headerTable:
                    title_index = headerTable.index('Title')
                    author_index = headerTable.index('Authors')
                    sourceTitle_index = headerTable.index('Source title')

示例#4

0

显示文件

        exportFile = 'list_references.csv'
    if len(sys.argv) > 3:
        listPaper = str(sys.argv[3])
    else:
        listPaper = 'list_papers.csv'

    #get_papers(dataFolder, exportFile)
    list_header = [
        'ID', 'Number of references',
        'Number of references outside selected database',
        'Number of self-cited references', 'List ID of references', 'Title',
        'Authors', 'Source title', 'Publisher', 'Year', 'Link'
    ]
    list_references = [list_header]

    paperTable = csv_tools.read_csv_table(listPaper)
    # Note: header of paperTable is ['ID', 'Title', 'Authors', 'Source title', 'Publisher', 'Year', 'Link']
    paperTable_transpose = csv_tools.transpose_table(paperTable)
    for k in range(1, len(paperTable_transpose[2])):
        authors_split = paperTable_transpose[2][k].split(',')
        paperTable_transpose[2][k] = [item.strip() for item in authors_split]

    for dirpath, dirs, files in os.walk(dataFolder):
        for filename in files:
            if filename[-4:].upper() == '.CSV':
                csvFile = os.path.join(dirpath, filename)
                # For each CSV file
                table = csv_tools.read_csv_table(csvFile)
                headerTable = table[0]
                if 'Authors' in headerTable and 'Title' in headerTable and 'References' in headerTable:
                    title_index = headerTable.index('Title')

示例#5

0

显示文件

    else:
        exportFile = 'list_sources_citations_net.csv'
    if len(sys.argv)>3:
        listNetAuthors = str(sys.argv[3])
    else:
        listNetAuthors = 'input_net_authors.txt'

    #get_sources_citations_net(listCitations, exportFile, listNetAuthors)
    list_header = ['ID', 'Source title', 'Abbreviated Source Title', 'Publisher', 'Conference name', 'Rating']
    list_sources = [list_header]
    
    with open(listNetAuthors, "rt") as textfile:
        author_lines = textfile.readlines()
        filtered_authors = [x.strip() for x in author_lines if x.strip()]
    
    citeTable = csv_tools.read_csv_table(listCitations)
    headerTable = citeTable[0]
    # Note that header of citation table is ['ID', 'Number of citations in selected database', 'List ID of citations', 'Title', 'Authors', 'Source title', 'Abbreviated Source Title', 'Publisher', 'Conference name', 'Year', 'Link']


    processed_source = []
    
    author_index = headerTable.index('Authors')
    sourceTitle_index = headerTable.index('Source title')
    abbr_sourceTitle_index = headerTable.index('Abbreviated Source Title')
    publisher_index = headerTable.index('Publisher')
    conference_index = headerTable.index('Conference name')
    access_index = [sourceTitle_index, abbr_sourceTitle_index, publisher_index, conference_index]

    for row in citeTable[1:]:
        # Collect journal name of authors