clean_list() –> ValueError: Wrong number of items passed 3, placement implies 1

Question:

I inherited this code from previous employee, and I tried to run this code but I’m getting an error.

def replaceitem(x):
    if x in ['ORION', 'ACTION', 'ICE', 'IRIS', 'FOCUS']:
        return 'CRM Application'
    else:
        return x
    
def clean_list(row):
    new_list = sorted(set(row['APLN_NM']), key=lambda x: row['APLN_NM'].index(x))
    for idx,i in enumerate(new_list):
        new_list[idx] = replaceitem(i)
    new_list = sorted(set(new_list), key=lambda x: new_list.index(x))
    return new_list

#*********************************************************************************************************************************************
df_agg['APLN_NM_DISTINCT']        = df_agg.apply(clean_list, axis = 1)
df_agg_single['APLN_NM_DISTINCT'] = df_agg_single.apply(clean_list, axis = 1)

While running the code I got this error:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2890             try:
-> 2891                 return self._engine.get_loc(casted_key)
   2892             except KeyError as err:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'APLN_NM_DISTINCT'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/generic.py in _set_item(self, key, value)
   3570         try:
-> 3571             loc = self._info_axis.get_loc(key)
   3572         except KeyError:

/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2892             except KeyError as err:
-> 2893                 raise KeyError(key) from err
   2894 

KeyError: 'APLN_NM_DISTINCT'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-71-e8b5e8d5b514> in <module>
    431 #*********************************************************************************************************************************************
    432 df_agg['APLN_NM_DISTINCT']        = df_agg.apply(clean_list, axis = 1)
--> 433 df_agg_single['APLN_NM_DISTINCT'] = df_agg_single.apply(clean_list, axis = 1)
    434 
    435 df_agg['TOTAL_HOLD_TIME']        = df_agg_single['TOTAL_HOLD_TIME'].astype(int)

/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
   3038         else:
   3039             # set column
-> 3040             self._set_item(key, value)
   3041 
   3042     def _setitem_slice(self, key: slice, value):

/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/frame.py in _set_item(self, key, value)
   3115         self._ensure_valid_index(value)
   3116         value = self._sanitize_column(key, value)
-> 3117         NDFrame._set_item(self, key, value)
   3118 
   3119         # check if we are modifying a copy

/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/generic.py in _set_item(self, key, value)
   3572         except KeyError:
   3573             # This item wasn't present, just insert at end
-> 3574             self._mgr.insert(len(self._info_axis), key, value)
   3575             return
   3576 

/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/internals/managers.py in insert(self, loc, item, value, allow_duplicates)
   1187             value = _safe_reshape(value, (1,) + value.shape)
   1188 
-> 1189         block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
   1190 
   1191         for blkno, count in _fast_count_smallints(self.blknos[loc:]):

/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/internals/blocks.py in make_block(values, placement, klass, ndim, dtype)
   2717         values = DatetimeArray._simple_new(values, dtype=dtype)
   2718 
-> 2719     return klass(values, ndim=ndim, placement=placement)
   2720 
   2721 

/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
   2373             values = np.array(values, dtype=object)
   2374 
-> 2375         super().__init__(values, ndim=ndim, placement=placement)
   2376 
   2377     @property

/opt/rh/rh-python36/root/usr/lib64/python3.6/site-packages/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
    128         if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values):
    129             raise ValueError(
--> 130                 f"Wrong number of items passed {len(self.values)}, "
    131                 f"placement implies {len(self.mgr_locs)}"
    132             )

ValueError: Wrong number of items passed 3, placement implies 1

df_agg and df_agg_single are dataframes with same column names.

But the data is present only in df_agg

data in df_agg dataframe looks like this

data in df_agg_single dataframe looks like this

so if the data frame is empty I am getting this type of error while applying clean_list method on the data frame.

Asked By: AshishMulupuri

||

Answers:

I identified the error is occurring only if the data frame is empty, so I tried if else to filter the empty data frame and it worked.

if df_agg.empty:
    df_agg['APLN_NM_DISTINCT']        = ''
else:
    df_agg['APLN_NM_DISTINCT']        = df_agg.apply(clean_list, axis = 1)
    

if df_agg_single.empty:
    df_agg_single['APLN_NM_DISTINCT'] = ''
else:
    df_agg_single['APLN_NM_DISTINCT'] = df_agg_single.apply(clean_list, axis = 1)
Answered By: AshishMulupuri