Grouped subsets operations
Task:Calculate the daily inventory status of various goods in the specified time period.
Python
1 | import pandas as pd |
2 | import numpy as np |
3 | starttime = '2015-01-01' |
4 | endtime = '2015-12-31' |
5 | stock_data = pd.read_csv('E:\\txt\\stocklog.csv',sep='\t') |
6 | stock_data['DATE']=pd.to_datetime(stock_data['DATE']) |
7 | stock_data = stock_data[(stock_data['DATE']>=starttime)&(stock_data['DATE']<=endtime)] |
8 | stock_data['ENTER']=stock_data['QUANTITY'][stock_data['INDICATOR']!='ISSUE'] |
9 | stock_data['ISSUE']=stock_data['QUANTITY'][stock_data['INDICATOR']=='ISSUE'] |
10 | stock_g = stock_data[['STOCKID','DATE','ENTER','ISSUE']].groupby(by=['STOCKID','DATE'],as_index=False).sum() |
11 | stock_gr = stock_g.groupby(by='STOCKID',as_index = False) |
12 | date_df = pd.DataFrame(pd.date_range(starttime,endtime),columns=['DATE']) |
13 | stock_status_list = [] |
1415 | for index,group in stock_gr: |
16 | date_df['STOCKID']=group['STOCKID'].values[0] |
17 | stock_status = pd.merge(date_df,group,on=['STOCKID','DATE'],how='left') |
18 | stock_status = stock_status.sort_values(['STOCKID','DATE']) |
19 | stock_status['OPEN']=0 |
20 | stock_status['CLOSE']=0 |
21 | stock_status['TOTAL']=0 |
22 | stock_status = stock_status.fillna(0) |
23 | stock_value = stock_status[['STOCKID','DATE','OPEN','ENTER','TOTAL','ISSUE','CLOSE']].values |
24 | open = 0 |
25 | for value in stock_value: |
26 | value[2] = open |
27 | value[4] = value[2] + value[3] |
28 | value[6] = value[4] - value[5] |
29 | open = value[6] |
30 | stock = pd.DataFrame(stock_value,columns = ['STOCKID','DATE','OPEN','ENTER','TOTAL','ISSUE','CLOSE']) |
31 | stock_status_list.append(stock) |
32 | stock_status = pd.concat(stock_status_list,ignore_index=True) |
print(stock_status) |
esProc
A | B | |
1 | =file("E:\\txt\\stocklog.csv").import@t() | |
2 | =A1.select(DATE>=date("2015-01-01") && DATE<=date("2015-12-31")) | |
3 | =A2.groups(STOCKID,DATE; sum(if(INDICATOR=="ISSUE",QUANTITY,0)):ISSUE, sum(if(INDICATOR!="ISSUE",QUANTITY,0)):ENTER) |
|
4 | =periods(start,end) | |
5 | for A3.group(STOCKID) | =A5.align(A4,DATE) |
6 | >b=c=0 | |
7 | =B5.new(A5.STOCKID:STOCKID,A4(#):DATE,c:OPEN,ENTER, (b=c+ENTER):TOTAL,ISSUE, (c=b-ISSUE):CLOSE) |
|
8 | >B7.run(ENTER=ifn(ENTER,0),ISSUE=ifn(ISSUE,0)) | |
9 | =@|B7 |
esProc 9 lines of code complete the task of Python 32 lines of code, and when you read the code later on, esProc is easier to understand.