Finding minimum iterative

Question:

sales_data = {
    'order_number': [1001, 1002, 1003, 1004, 1005],
    'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}

sales_data = pd.DataFrame(sales_data)

capacity_data = {
    'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
    'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180, 
                 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310]
}

capacity_data = pd.DataFrame(capacity_data)

I would like to output like this

output = {'order_number': [1001, 1002, 1003, 1004, 1005], 'confirmation_date':['2022-02-04', '2022-02-05', '2022-02-06', '2022-02-06', '2022-02-06']

I just want to find closest date where capacity is free and if it is free then reduce capacity by one for another iteration.

This is my script:

 order_number = None
 confirmation_date = None
    
 grouped = sales_data['order_number'].unique()
   
    # Iterate over the groups and rows within each group
    for group in grouped:
        
            for order_row in range(len(capacity_data)):
                    if capacity_data['capacity'][order_row] > 0:
                        try:
                            order_number.append(group) 
                            confirmation_date.append(capacity_data['date'][order_row])
                            capacity_data['capacity'][order_row] = capacity_data['capacity'][order_row] - 1
                        except:
                             pass
                    else:
                        pass
    orderdict =  dict(zip(order_number, caonfirmation_date))

I would also like to ask if there is way to make this script more optimaze for ittering more than 100k rows

Asked By: onhalu

||

Answers:

You can do it this way:

import pandas as pd
import numpy as np

sales_data = {
    'order_number': [1001, 1002, 1003, 1004, 1005],
    'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}

sales_data = pd.DataFrame(sales_data)

capacity_data = {
    'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
    'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180, 
                 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 303]
}

capacity_data = pd.DataFrame(capacity_data)

confirmation_dates = []

for index, row in sales_data.iterrows():
    
    order_date = pd.to_datetime(row['order_date'])
    
    available_date = capacity_data[capacity_data['capacity'] > 0]['date'].sub(order_date).abs().idxmin()
    confirmation_dates.append(str(capacity_data.loc[available_date, 'date'].date()))
    capacity_data.loc[available_date, 'capacity'] -= 1

output = {'order_number': sales_data['order_number'].tolist(), 'confirmation_date': confirmation_dates}

print(output)

which returns you output:

{'order_number': [1001, 1002, 1003, 1004, 1005], 'confirmation_date': ['2022-02-04', '2022-02-05', '2022-02-07', '2022-02-10', '2022-02-14']}

Try using it on your full data. If it is too slow, use apply() in the following way:


import pandas as pd
import numpy as np

sales_data = {
    'order_number': [1001, 1002, 1003, 1004, 1005],
    'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}

sales_data = pd.DataFrame(sales_data)

capacity_data = {
    'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
    'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180, 
                 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 303]
}

capacity_data = pd.DataFrame(capacity_data)

def find_confirmation_date(row):
    order_date = row['order_date']
    capacity_data_copy = capacity_data.copy()  # create a copy of the DataFrame
    available_dates = capacity_data_copy[capacity_data_copy['capacity'] >= 0]['date']
    closest_date = available_dates.iloc[(available_dates - pd.to_datetime(order_date)).abs().argsort()[0]]
    capacity_data_copy.loc[capacity_data_copy['date'] == closest_date, 'capacity'] -= 1
    return closest_date.strftime('%Y-%m-%d')

output = sales_data.apply(find_confirmation_date, axis=1)
output_dict = {'order_number': sales_data['order_number'].to_list(), 'confirmation_date': output.tolist()}



which gives you

{'order_number': [1001, 1002, 1003, 1004, 1005], 'confirmation_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']}

I think you’re trying to confirm the order after the order arrives and capacity is available. You can follow this code to achieve that.

sales_data = {
    'order_number': [1001, 1002, 1003, 1004, 1005],
    'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}

sales_data = pd.DataFrame(sales_data)
sales_data['order_date'] = pd.to_datetime(sales_data['order_date'])

capacity_data = {
    'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
    'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180, 
                 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 200]
}

capacity_data = pd.DataFrame(capacity_data)

order_number = []
confirmation_date = []

# Iterate over the groups and rows within each group
for sales_idx in range(len(sales_data)):
    
    sales_row = sales_data.iloc[sales_idx]
    
    for capacity_idx in range(len(capacity_data)):
            
        capacity_row = capacity_data.iloc[capacity_idx]
            
        if capacity_row[0] < sales_row[1] or capacity_row[1]<=0:
            continue
        
        order_number.append(sales_row[0])
        confirmation_date.append(capacity_row[0])
        
        capacity_data._set_value(capacity_idx,'capacity',capacity_row[1]-1)
        break

final_data = {
    'order_number':order_number,
    'confirm_date':confirmation_date
}

final_data = pd.DataFrame(final_data)

print(final_data)
Answered By: Harsh Dobariya
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.