Exemplo n.º 1
0
#         'delay': 10,
#         'search': 'fmri',
#         'mode': 'direct',  # PLoS sends nice usable XML directly
#         'min_pmid': None
#     },
#     'Journal of Neuroscience': {
#         'delay': 20,
#         'mode': 'browser',
#         'search': 'fmri',
#         'min_pmid': None,
#         'limit': 100  # We can limit to only N new articles
#     }
# }

# Verbose output
ace.set_logging_level('debug')

# Create temporary output dir
output_dir = './tmp/articles'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Initialize Scraper
scraper = Scraper(output_dir)

ids = [
    "17427209", "21966352", "17475792", "21300162", "20188841", "21382560",
    "14960288", "21232548", "20809855", "19651151", "21686071", "19875675",
    "20816974", "21782354", "20933020", "18586110", "21499511", "22137505",
    "19766936", "19580877"
]
Exemplo n.º 2
0
        'delay': 10,
        'search': 'fmri',
        'mode': 'direct',  # PLoS sends nice usable XML directly
        'min_pmid': None
    },
    'Journal of Neuroscience': {
        'delay': 20,
        'mode': 'browser',
        'search': 'fmri',
        'min_pmid': None,
        'limit': 100  # We can limit to only N new articles
    }
}

# Verbose output
ace.set_logging_level('debug')

# Create temporary output dir
output_dir = '/tmp/articles'
if not os.path.exists(output_dir):
	os.makedirs(output_dir)

# Initialize Scraper
scraper = Scraper('/tmp/articles')

# Loop through journals and 
for j, settings in journals.items():
    scraper.retrieve_journal_articles(j, **settings)


Exemplo n.º 3
0
# In this example we create a new DB file and process a bunch of
# articles. Note that due to copyright restrictions, articles can't
# be included in this package, so you'll need to replace PATH_TO_FILES
# with something that works.

import ace
from ace import database

# Uncomment the next line to seem more information
ace.set_logging_level("info")

# Change this to a valid path to a set of html files.
PATH_TO_FILES = "/Users/tal/tmp/html/*.html"

db = database.Database("example_db.sqlite")
db.add_articles(PATH_TO_FILES)
db.print_stats()
Exemplo n.º 4
0
# In this example we create a new DB file and process a bunch of
# articles. Note that due to copyright restrictions, articles can't
# be included in this package, so you'll need to replace PATH_TO_FILES
# with something that works.

import ace
from ace import database

# Uncomment the next line to seem more information
ace.set_logging_level('info')

# Change this to a valid path to a set of html files.
PATH_TO_FILES = "/Users/tal/tmp/html/*.html"

db = database.Database('example_db.sqlite')
db.add_articles(PATH_TO_FILES)
db.print_stats()
Exemplo n.º 5
0
# In this example we create a new DB file and process a bunch of
# articles. Note that due to copyright restrictions, articles can't
# be included in this package, so you'll need to replace PATH_TO_FILES
# with something that works.

import ace
from ace import database
from ace import export
from ace.database import Database, Article, Table, Activation


# Uncomment the next line to seem more information
ace.set_logging_level('info')

# Change this to a valid path to a set of html files.
cd /home/jflournoy/Documents/NeuroDebian/code/metanal/ACE
PATH_TO_FILES = "./tmp/articles/html/ids/*.html"

db = database.Database(adapter='sqlite',db_name='sqlite:///example_db.sqlite')
#db.add_articles(PATH_TO_FILES)
db.print_stats()

export.export_database(db,'exported_data_2.csv',groups=True,size=True,statistic=True)

for article in db.session.query(Article).filter(Article.tables.any()).all():
	for t in article.tables:
		for p in t.activations:
			if isinstance(p.size, basestring):
                p.size_tmp = [p.size]
            elif p.size is None:
                p.size_tmp = []