forked from darrenhon/oshpd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sampling.py
36 lines (30 loc) · 855 Bytes
/
sampling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import ocsv
import sys
import random
# argv[1] input file
# argv[2] public output file
# argv[3] private output file
# read input file the first round to load all PIDs
fin = open(sys.argv[1], 'r')
col = ocsv.getColumns(fin.readline())
pids = set()
ocsv.runFunc(fin, lambda line: pids.add(line.strip().split(',')[col['PID']]))
fin.close()
# sample private PIDs
pripids = set(random.sample(pids, int(len(pids) / 10)))
# read input file the second round to divide into public and private
fpub = open(sys.argv[2], 'w')
fpri = open(sys.argv[3], 'w')
fin = open(sys.argv[1], 'r')
line = fin.readline()
fpub.write(line)
fpri.write(line)
def write(line):
if line.strip().split(',')[col['PID']] in pripids:
fpri.write(line)
else:
fpub.write(line)
ocsv.runFunc(fin, write)
fin.close()
fpri.close()
fpub.close()