Is there a retain function in python? I want to transfer SAS code to python
Question:
I want to transfer SAS code to python, and cannot find a retain function in python.
The data is like :
type_id amount
1 100
1 200
1 400
2 0
1 200
1 300
2 0
1 150
What I want is when type_id = 2, the amount is equal to the negative value of the previous row. So the data will be like this after retain function:
type_id amount
1 100
1 200
1 400
2 -400
1 200
1 300
2 -300
1 150
The SAS code is :
data B;
set A;
retain tempvar 0;
if type_id = 2
then amount = tempvar;
else tempvar = -amount;
drop tempvar;
run;
Does anyone have any idea about how to do this in python?
Thanks!
Answers:
IIUC
df
type_id amount
0 1 100
1 1 200
2 1 400
3 2 0
4 1 200
5 1 300
6 2 0
7 1 150
def retain(df):
df['ret'] = df['amount'].shift()
df.ix[df['type_id']==2,'amount'] = -df.ix[df['type_id']==2,'ret']
df.drop("ret", axis=1, inplace=True)
return df
retain(df)
type_id amount
0 1 100.0
1 1 200.0
2 1 400.0
3 2 -400.0
4 1 200.0
5 1 300.0
6 2 -300.0
7 1 150.0
Alternatively:
def retain(df):
df.amount.ix[df.type_id==2] = - df.amount.shift().ix[df.type_id==2]
return df
retain(df)
type_id amount
0 1 100.0
1 1 200.0
2 1 400.0
3 2 -400.0
4 1 200.0
5 1 300.0
6 2 -300.0
7 1 150.0
# another idea to try
class retain_memo(object):
def __init__(self):
self.value = None
def set_get(self, u):
self.value = u
return u
def retain(series):
memo = retain_memo()
return pd.Series([memo.set_get(u) if pd.notnull(u) else memo.value for u in series])
test_series = pd.Series([1,1,2,None,None,4,4,None,None,None,None])
sas_style_retained = retain(test_series)
pd.DataFrame({'original':test_series,'retained':sas_style_retained})
# original retained
# 0 1.0 1.0
# 1 1.0 1.0
# 2 2.0 2.0
# 3 NaN 2.0
# 4 NaN 2.0
# 5 4.0 4.0
# 6 4.0 4.0
# 7 NaN 4.0
# 8 NaN 4.0
# 9 NaN 4.0
# 10 NaN 4.0
# use something like df.groupby(<id>)[var].transform(retain)
# for grouped data
I want to transfer SAS code to python, and cannot find a retain function in python.
The data is like :
type_id amount
1 100
1 200
1 400
2 0
1 200
1 300
2 0
1 150
What I want is when type_id = 2, the amount is equal to the negative value of the previous row. So the data will be like this after retain function:
type_id amount
1 100
1 200
1 400
2 -400
1 200
1 300
2 -300
1 150
The SAS code is :
data B;
set A;
retain tempvar 0;
if type_id = 2
then amount = tempvar;
else tempvar = -amount;
drop tempvar;
run;
Does anyone have any idea about how to do this in python?
Thanks!
IIUC
df
type_id amount
0 1 100
1 1 200
2 1 400
3 2 0
4 1 200
5 1 300
6 2 0
7 1 150
def retain(df):
df['ret'] = df['amount'].shift()
df.ix[df['type_id']==2,'amount'] = -df.ix[df['type_id']==2,'ret']
df.drop("ret", axis=1, inplace=True)
return df
retain(df)
type_id amount
0 1 100.0
1 1 200.0
2 1 400.0
3 2 -400.0
4 1 200.0
5 1 300.0
6 2 -300.0
7 1 150.0
Alternatively:
def retain(df):
df.amount.ix[df.type_id==2] = - df.amount.shift().ix[df.type_id==2]
return df
retain(df)
type_id amount
0 1 100.0
1 1 200.0
2 1 400.0
3 2 -400.0
4 1 200.0
5 1 300.0
6 2 -300.0
7 1 150.0
# another idea to try
class retain_memo(object):
def __init__(self):
self.value = None
def set_get(self, u):
self.value = u
return u
def retain(series):
memo = retain_memo()
return pd.Series([memo.set_get(u) if pd.notnull(u) else memo.value for u in series])
test_series = pd.Series([1,1,2,None,None,4,4,None,None,None,None])
sas_style_retained = retain(test_series)
pd.DataFrame({'original':test_series,'retained':sas_style_retained})
# original retained
# 0 1.0 1.0
# 1 1.0 1.0
# 2 2.0 2.0
# 3 NaN 2.0
# 4 NaN 2.0
# 5 4.0 4.0
# 6 4.0 4.0
# 7 NaN 4.0
# 8 NaN 4.0
# 9 NaN 4.0
# 10 NaN 4.0
# use something like df.groupby(<id>)[var].transform(retain)
# for grouped data