apply different function on each row
Question:
I have a dataframe with 2 columns field
and value
(number of rows maximum 10). I need to perform some checks depending on the field(ie need to apply different function on each row) and store its result in status
column. Below is sample:
data = {
'field': ['a', 'b'],
'value': [5, 20],
}
df = pd.DataFrame(data)
print('Initial DF')
print(f"{df=}")
def _check_field_a(value):
_min = 1
_max = 10
if _min <= value <= _max:
return True
return False
def _check_field_b(value):
values = [10, 20, 30, 40]
if value in values:
return True
return False
func = [_check_field_a, _check_field_b]
df['status'] = df.apply(lambda row: func[row.name](row['value']), axis=1)
print('After check DF')
print(f'{df=}')
output
Initial DF
df= field value
0 a 5
1 b 20
After check DF
df= field value status
0 a 5 True
1 b 20 True
The above code is working, just wondering is there any other better way to achieve the same?
Edit-1
Getting inspired from all below answers, I have modified the code but currently not working.
data = {
'field': ['a', 'b', 'c'],
'value': [5, 20, 80],
}
df = pd.DataFrame(data)
print('Initial DF')
print(f"{df=}")
conditions = {
'a': {'values': (1, 10), 'check_type': 'between'},
'b': {'values': [10, 20, 30, 40], 'check_type': 'isin'},
'c': {'values': (50, 100), 'check_type': 'between'},
}
df['status'] = False
for key, condition in conditions.items():
if condition['check_type'] == 'between':
df['status'] = df.loc[df['field'] == key].between(condition['values'])
elif condition['check_type'] == 'isin':
df['status'] = df.loc[df['field'] == key].isin(condition['values'])
print('After check DF')
print(f'{df=}')
Edit-2
Using np.select
, This is working and so can add as many fields easily
data = {
'field': ['a', 'b', 'c'],
'value': [5, 20, 80],
}
df = pd.DataFrame(data)
print('Initial DF')
print(f"{df=}")
condlist = [df['field'] == 'a', df['field'] == 'b', df['field'] == 'c']
choicelist = [df['value'].between(1,10), df['value'].isin([10,20,30,40]), df['value'].between(50,100)]
df['status'] = np.select(condlist, choicelist, False)
print('After check DF')
print(f'{df=}')
Answers:
you can also use a dictionary
import pandas as pd
data = {
'field': ['a', 'b'],
'value': [5, 20],
}
df = pd.DataFrame(data)
print('Initial DF')
print(df)
def check_field_a(value):
return 1 <= value <= 10
def check_field_b(value):
return value in [10, 20, 30, 40]
# Map fields to their respective functions
check_functions = {'a': check_field_a, 'b': check_field_b}
df['status'] = df.apply(lambda row: check_functions[row['field']](row['value']), axis=1)
print('nAfter check DF')
print(df)
For your two specific functions, you can vectorize them:
df['output'] = np.where(
df['field'] == 'a',
df['value'].between(1,10), # change min,max here if needed
df['value'].isin([10,20,30,40])
)
You can use np.where() with multiple conditions.
a_min = 1
a_max = 10
b_vals = [10,20,30,40]
df['status'] = np.where(
((df['field']=='a') & (df['value'].between(a_min, a_max))) |
((df['field']=='b') & (df['value'].isin(b_vals))),
True,
False
)
I have a dataframe with 2 columns field
and value
(number of rows maximum 10). I need to perform some checks depending on the field(ie need to apply different function on each row) and store its result in status
column. Below is sample:
data = {
'field': ['a', 'b'],
'value': [5, 20],
}
df = pd.DataFrame(data)
print('Initial DF')
print(f"{df=}")
def _check_field_a(value):
_min = 1
_max = 10
if _min <= value <= _max:
return True
return False
def _check_field_b(value):
values = [10, 20, 30, 40]
if value in values:
return True
return False
func = [_check_field_a, _check_field_b]
df['status'] = df.apply(lambda row: func[row.name](row['value']), axis=1)
print('After check DF')
print(f'{df=}')
output
Initial DF
df= field value
0 a 5
1 b 20
After check DF
df= field value status
0 a 5 True
1 b 20 True
The above code is working, just wondering is there any other better way to achieve the same?
Edit-1
Getting inspired from all below answers, I have modified the code but currently not working.
data = {
'field': ['a', 'b', 'c'],
'value': [5, 20, 80],
}
df = pd.DataFrame(data)
print('Initial DF')
print(f"{df=}")
conditions = {
'a': {'values': (1, 10), 'check_type': 'between'},
'b': {'values': [10, 20, 30, 40], 'check_type': 'isin'},
'c': {'values': (50, 100), 'check_type': 'between'},
}
df['status'] = False
for key, condition in conditions.items():
if condition['check_type'] == 'between':
df['status'] = df.loc[df['field'] == key].between(condition['values'])
elif condition['check_type'] == 'isin':
df['status'] = df.loc[df['field'] == key].isin(condition['values'])
print('After check DF')
print(f'{df=}')
Edit-2
Using np.select
, This is working and so can add as many fields easily
data = {
'field': ['a', 'b', 'c'],
'value': [5, 20, 80],
}
df = pd.DataFrame(data)
print('Initial DF')
print(f"{df=}")
condlist = [df['field'] == 'a', df['field'] == 'b', df['field'] == 'c']
choicelist = [df['value'].between(1,10), df['value'].isin([10,20,30,40]), df['value'].between(50,100)]
df['status'] = np.select(condlist, choicelist, False)
print('After check DF')
print(f'{df=}')
you can also use a dictionary
import pandas as pd
data = {
'field': ['a', 'b'],
'value': [5, 20],
}
df = pd.DataFrame(data)
print('Initial DF')
print(df)
def check_field_a(value):
return 1 <= value <= 10
def check_field_b(value):
return value in [10, 20, 30, 40]
# Map fields to their respective functions
check_functions = {'a': check_field_a, 'b': check_field_b}
df['status'] = df.apply(lambda row: check_functions[row['field']](row['value']), axis=1)
print('nAfter check DF')
print(df)
For your two specific functions, you can vectorize them:
df['output'] = np.where(
df['field'] == 'a',
df['value'].between(1,10), # change min,max here if needed
df['value'].isin([10,20,30,40])
)
You can use np.where() with multiple conditions.
a_min = 1
a_max = 10
b_vals = [10,20,30,40]
df['status'] = np.where(
((df['field']=='a') & (df['value'].between(a_min, a_max))) |
((df['field']=='b') & (df['value'].isin(b_vals))),
True,
False
)