Exemplo n.º 1
0
def random_sample_by_year(year):
    try:
        year = int(year)
    except ValueError:
        raise InvalidUsage('Given year is not an integer')
    if year < 1970:
        raise InvalidUsage('Given year should be above 1970')
    if year > 3000:
        raise InvalidUsage('Given year should be below 3000')
    with get_db().cursor() as cursor:
        cursor.execute('''
            SELECT COUNT(*) FROM sample
            WHERE 
                (\'%i-01-01 00:00:00\' <= build_timestamp) AND (build_timestamp < \'%i-01-01 00:00:00\')
        ''' % (year, year + 1))
        count = cursor.fetchall()[0][0]
        rand = random.randint(0, count - 1)
        cursor.execute(
            '''
            SELECT hash_sha256 FROM sample
            WHERE (\'%i-01-01 00:00:00\' <= build_timestamp) AND (build_timestamp < \'%i-01-01 00:00:00\')
            LIMIT 1 OFFSET %%s
            ''' % (year, year + 1), (rand, ))
        random_sha256 = cursor.fetchall()[0][0]
        return jsonify(JsonFactory().from_sample(
            get_sample_repository().by_hash_sha256(random_sha256)))
Exemplo n.º 2
0
def bulk():
    api_key = request.headers.get('X-ApiKey')
    if not api_key:
        raise InvalidUsage('API key required for this action')
    validate_api_key(api_key)

    if 'hashes' not in request.files:
        raise InvalidUsage('Field "hashes" does not exist')

    samples = get_sample_repository().by_ids(
        get_sample_repository().ids_by_hashes(request.files['hashes']))

    return jsonify([JsonFactory().from_sample(sample) for sample in samples])
Exemplo n.º 3
0
def get_sample(hash):
    if not hash:
        raise InvalidUsage('Hash empty', status_code=400)
    if not all(c in string.hexdigits for c in hash):
        raise InvalidUsage('Hash may only contain hex chars', status_code=400)
    if len(hash) == 64:
        sample = get_sample_repository().by_hash_sha256(hash)
    elif len(hash) == 32:
        sample = get_sample_repository().by_hash_md5(hash)
    elif len(hash) == 40:
        sample = get_sample_repository().by_hash_sha1(hash)
    else:
        raise InvalidUsage(
            'Hash is not of any of the following lengths: 64, 32, 40',
            status_code=400)
    if sample is None:
        raise NotFound()
    return jsonify(JsonFactory().from_sample(sample))
Exemplo n.º 4
0
def random_samples(count):
    try:
        count = int(count)
    except ValueError:
        raise InvalidUsage('Given count is not an integer')
    if count <= 0:
        raise InvalidUsage('Given count should be above 0')
    if count > 50:
        api_key = request.headers.get('X-ApiKey')
        if not api_key:
            raise InvalidUsage(
                'Given count should be below 50 or you have to pass an API key'
            )
        validate_api_key(api_key)
        if not get_api_key_repository().exists(api_key):
            raise InvalidUsage('API key does not exist')

    return jsonify([
        JsonFactory().from_sample(sample)
        for sample in (get_sample_repository().random_by_id(count))
    ])
Exemplo n.º 5
0
def newest_samples():
    return jsonify([
        JsonFactory().from_sample(sample)
        for sample in (get_sample_repository().newest(10))
    ])
Exemplo n.º 6
0
def get_samples_by_section(sha256):
    validate_sha256(sha256)
    samples = get_sample_repository().by_section_hash(sha256)
    return jsonify([JsonFactory().from_sample(sample) for sample in samples])
Exemplo n.º 7
0
timer.mark('read_extractors')
sample = Sample()
extractors = get_extractors(args.r2 or (task and task.type == 'R2Disassembly')).values()
logger.debug('Enabled Extractors: %s' % extractors)
for extractor in extractors:
    timer.mark('extractor_%s' % extractor.__class__.__name__)
    try:
        extractor.extract(sample)
    except Exception as e:
        logger.error('%s' % e)
        if sentry:
            sentry.captureException()

timer.mark('output')
out = JsonFactory(args.filter).from_sample(sample)
if task:
    out['task_id'] = task.id
    if 'source_id' in task.payload.keys():
        out['source_id'] = task.payload['source_id']
    if 'tags' in task.payload.keys():
        out['tags'] = task.payload['tags']
    if 'file_names' in task.payload.keys():
        out['file_names'] = task.payload['file_names']

if args.server:
    import requests

    r = requests.post(
        kurasuta_api.get_sha256_url(sample.hash_sha256),
        data=json.dumps(out, cls=DateTimeEncoder),
Exemplo n.º 8
0
imported = 0
skipped = 0

logger.debug('Connecting to postgres database...')
db = psycopg2.connect(os.environ['POSTGRES_DATABASE_LINK'])

logger.debug('Connecting to arango database...')
arango_connection = Connection(
    arangoURL=os.environ['ARANGODB_URL'],
    username=os.environ['ARANGODB_USERNAME'],
    password=os.environ['ARANGODB_PASSWORD']
)

arango_database = arango_connection['kurasuta']
sample_repository = SampleRepository(db)
json_factory = JsonFactory()

with open(hash_file) as fp:
    hashes = [line.strip() for line in fp if line.strip()]
logger.info('Found %i hashes' % len(hashes))


def exists_in_collection(key, collection):
    # this can go away as soon as https://github.com/tariqdaouda/pyArango/issues/119 is resolved
    try:
        doc = collection[key]
    except KeyError:
        return False
    return True

Exemplo n.º 9
0
parser.add_argument('target_file_name')
args = parser.parse_args()

target_file_name = args.target_file_name
existing_ids = []
if os.path.exists(target_file_name):
    with open(target_file_name, 'r') as fp:
        for line in fp:
            line = line.strip()
            if not line:
                continue
            existing_ids.append(json.loads(line)['id'])

db = psycopg2.connect(os.environ['POSTGRES_DATABASE_LINK'])
sample_repository = SampleRepository(db)
json_factory = JsonFactory()
with db.cursor() as cursor:
    logger.info('Selecting all ids...')
    cursor.execute('SELECT id FROM sample')
    logger.info('Found %i ids.' % cursor.rowcount)
    for row in cursor:
        id = row[0]
        if id in existing_ids:
            continue
        samples = sample_repository.by_ids([id])
        logger.info('Dumping sample with id %s...' % id)
        with open(target_file_name, 'a') as fp:
            for sample in samples:
                fp.write('%s\n' % json.dumps(json_factory.from_sample(sample)))
logger.info('All done.')