/
gougou.py
61 lines (56 loc) · 1.69 KB
/
gougou.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
import time
import sys
def CompareXlsx(filename):
#Program begin
Gou = []
for fid,fname in enumerate(filename):
df=pd.read_excel(fname,sheetname='Sheet1',header=None,parse_cols=[8,9])
df.columns=['ID','name']
df.sort_values(by='ID',inplace=True)
df.dropna(inplace=True)
df.drop_duplicates(subset='ID',inplace=True)
df.reset_index(drop=True,inplace=True)
for idnum in range(len(df['ID'])-1,-1,-1):
if isinstance(df['ID'][idnum], int):
break
df.drop(idnum,inplace=True)
excel = df.reset_index(drop=True)
Gou.append(excel['ID'])
del excel
del df
com1=Gou[1][Gou[1].isin(Gou[0])]
for comnum in range(2,len(filename)):
com2=Gou[comnum].isin(com1)
if 0==sum(com2):
com1=[]
print('Output:')
print('No Same data')
return
com1=com1[com2]
del Gou
com1.reset_index(drop=True,inplace=True)
samenum=len(com1)
print('Output:')
print('SAME:')
pd.set_option('display.max_rows',samenum)
print(com1)
pd.reset_option('display.max_rows')
print('NUM:%d' %samenum)
com1.to_csv('compare.csv',index=False)
if __name__=="__main__":
#start time
start = time.clock()
if len(sys.argv)<=1:
filename=['partly.xlsx','radical.xlsx']
elif len(sys.argv)==2:
print('Not enough input file')
sys.exit(0)
else:
filename=sys.argv[1:]
print('Compare FILE is')
print(filename)
CompareXlsx(filename)
#end time
end = time.clock()
print("The function run time is : %.03f seconds" %(end-start))