Finding minimum iterative
Question:
sales_data = {
'order_number': [1001, 1002, 1003, 1004, 1005],
'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}
sales_data = pd.DataFrame(sales_data)
capacity_data = {
'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180,
190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310]
}
capacity_data = pd.DataFrame(capacity_data)
I would like to output like this
output = {'order_number': [1001, 1002, 1003, 1004, 1005], 'confirmation_date':['2022-02-04', '2022-02-05', '2022-02-06', '2022-02-06', '2022-02-06']
I just want to find closest date where capacity is free and if it is free then reduce capacity by one for another iteration.
This is my script:
order_number = None
confirmation_date = None
grouped = sales_data['order_number'].unique()
# Iterate over the groups and rows within each group
for group in grouped:
for order_row in range(len(capacity_data)):
if capacity_data['capacity'][order_row] > 0:
try:
order_number.append(group)
confirmation_date.append(capacity_data['date'][order_row])
capacity_data['capacity'][order_row] = capacity_data['capacity'][order_row] - 1
except:
pass
else:
pass
orderdict = dict(zip(order_number, caonfirmation_date))
I would also like to ask if there is way to make this script more optimaze for ittering more than 100k rows
Answers:
You can do it this way:
import pandas as pd
import numpy as np
sales_data = {
'order_number': [1001, 1002, 1003, 1004, 1005],
'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}
sales_data = pd.DataFrame(sales_data)
capacity_data = {
'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180,
190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 303]
}
capacity_data = pd.DataFrame(capacity_data)
confirmation_dates = []
for index, row in sales_data.iterrows():
order_date = pd.to_datetime(row['order_date'])
available_date = capacity_data[capacity_data['capacity'] > 0]['date'].sub(order_date).abs().idxmin()
confirmation_dates.append(str(capacity_data.loc[available_date, 'date'].date()))
capacity_data.loc[available_date, 'capacity'] -= 1
output = {'order_number': sales_data['order_number'].tolist(), 'confirmation_date': confirmation_dates}
print(output)
which returns you output:
{'order_number': [1001, 1002, 1003, 1004, 1005], 'confirmation_date': ['2022-02-04', '2022-02-05', '2022-02-07', '2022-02-10', '2022-02-14']}
Try using it on your full data. If it is too slow, use apply()
in the following way:
import pandas as pd
import numpy as np
sales_data = {
'order_number': [1001, 1002, 1003, 1004, 1005],
'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}
sales_data = pd.DataFrame(sales_data)
capacity_data = {
'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180,
190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 303]
}
capacity_data = pd.DataFrame(capacity_data)
def find_confirmation_date(row):
order_date = row['order_date']
capacity_data_copy = capacity_data.copy() # create a copy of the DataFrame
available_dates = capacity_data_copy[capacity_data_copy['capacity'] >= 0]['date']
closest_date = available_dates.iloc[(available_dates - pd.to_datetime(order_date)).abs().argsort()[0]]
capacity_data_copy.loc[capacity_data_copy['date'] == closest_date, 'capacity'] -= 1
return closest_date.strftime('%Y-%m-%d')
output = sales_data.apply(find_confirmation_date, axis=1)
output_dict = {'order_number': sales_data['order_number'].to_list(), 'confirmation_date': output.tolist()}
which gives you
{'order_number': [1001, 1002, 1003, 1004, 1005], 'confirmation_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']}
I think you’re trying to confirm the order after the order arrives and capacity is available. You can follow this code to achieve that.
sales_data = {
'order_number': [1001, 1002, 1003, 1004, 1005],
'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}
sales_data = pd.DataFrame(sales_data)
sales_data['order_date'] = pd.to_datetime(sales_data['order_date'])
capacity_data = {
'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180,
190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 200]
}
capacity_data = pd.DataFrame(capacity_data)
order_number = []
confirmation_date = []
# Iterate over the groups and rows within each group
for sales_idx in range(len(sales_data)):
sales_row = sales_data.iloc[sales_idx]
for capacity_idx in range(len(capacity_data)):
capacity_row = capacity_data.iloc[capacity_idx]
if capacity_row[0] < sales_row[1] or capacity_row[1]<=0:
continue
order_number.append(sales_row[0])
confirmation_date.append(capacity_row[0])
capacity_data._set_value(capacity_idx,'capacity',capacity_row[1]-1)
break
final_data = {
'order_number':order_number,
'confirm_date':confirmation_date
}
final_data = pd.DataFrame(final_data)
print(final_data)
sales_data = {
'order_number': [1001, 1002, 1003, 1004, 1005],
'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}
sales_data = pd.DataFrame(sales_data)
capacity_data = {
'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180,
190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310]
}
capacity_data = pd.DataFrame(capacity_data)
I would like to output like this
output = {'order_number': [1001, 1002, 1003, 1004, 1005], 'confirmation_date':['2022-02-04', '2022-02-05', '2022-02-06', '2022-02-06', '2022-02-06']
I just want to find closest date where capacity is free and if it is free then reduce capacity by one for another iteration.
This is my script:
order_number = None
confirmation_date = None
grouped = sales_data['order_number'].unique()
# Iterate over the groups and rows within each group
for group in grouped:
for order_row in range(len(capacity_data)):
if capacity_data['capacity'][order_row] > 0:
try:
order_number.append(group)
confirmation_date.append(capacity_data['date'][order_row])
capacity_data['capacity'][order_row] = capacity_data['capacity'][order_row] - 1
except:
pass
else:
pass
orderdict = dict(zip(order_number, caonfirmation_date))
I would also like to ask if there is way to make this script more optimaze for ittering more than 100k rows
You can do it this way:
import pandas as pd
import numpy as np
sales_data = {
'order_number': [1001, 1002, 1003, 1004, 1005],
'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}
sales_data = pd.DataFrame(sales_data)
capacity_data = {
'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180,
190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 303]
}
capacity_data = pd.DataFrame(capacity_data)
confirmation_dates = []
for index, row in sales_data.iterrows():
order_date = pd.to_datetime(row['order_date'])
available_date = capacity_data[capacity_data['capacity'] > 0]['date'].sub(order_date).abs().idxmin()
confirmation_dates.append(str(capacity_data.loc[available_date, 'date'].date()))
capacity_data.loc[available_date, 'capacity'] -= 1
output = {'order_number': sales_data['order_number'].tolist(), 'confirmation_date': confirmation_dates}
print(output)
which returns you output:
{'order_number': [1001, 1002, 1003, 1004, 1005], 'confirmation_date': ['2022-02-04', '2022-02-05', '2022-02-07', '2022-02-10', '2022-02-14']}
Try using it on your full data. If it is too slow, use apply()
in the following way:
import pandas as pd
import numpy as np
sales_data = {
'order_number': [1001, 1002, 1003, 1004, 1005],
'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}
sales_data = pd.DataFrame(sales_data)
capacity_data = {
'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180,
190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 303]
}
capacity_data = pd.DataFrame(capacity_data)
def find_confirmation_date(row):
order_date = row['order_date']
capacity_data_copy = capacity_data.copy() # create a copy of the DataFrame
available_dates = capacity_data_copy[capacity_data_copy['capacity'] >= 0]['date']
closest_date = available_dates.iloc[(available_dates - pd.to_datetime(order_date)).abs().argsort()[0]]
capacity_data_copy.loc[capacity_data_copy['date'] == closest_date, 'capacity'] -= 1
return closest_date.strftime('%Y-%m-%d')
output = sales_data.apply(find_confirmation_date, axis=1)
output_dict = {'order_number': sales_data['order_number'].to_list(), 'confirmation_date': output.tolist()}
which gives you
{'order_number': [1001, 1002, 1003, 1004, 1005], 'confirmation_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']}
I think you’re trying to confirm the order after the order arrives and capacity is available. You can follow this code to achieve that.
sales_data = {
'order_number': [1001, 1002, 1003, 1004, 1005],
'order_date': ['2022-02-01', '2022-02-03', '2022-02-07', '2022-02-10', '2022-02-14']
}
sales_data = pd.DataFrame(sales_data)
sales_data['order_date'] = pd.to_datetime(sales_data['order_date'])
capacity_data = {
'date': pd.date_range(start='2022-02-01', end='2022-02-28', freq='D'),
'capacity': [0, 0, 0, 1, 1, 100, 110, 120, 130, 140, 150, 160, 170, 180,
190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 200]
}
capacity_data = pd.DataFrame(capacity_data)
order_number = []
confirmation_date = []
# Iterate over the groups and rows within each group
for sales_idx in range(len(sales_data)):
sales_row = sales_data.iloc[sales_idx]
for capacity_idx in range(len(capacity_data)):
capacity_row = capacity_data.iloc[capacity_idx]
if capacity_row[0] < sales_row[1] or capacity_row[1]<=0:
continue
order_number.append(sales_row[0])
confirmation_date.append(capacity_row[0])
capacity_data._set_value(capacity_idx,'capacity',capacity_row[1]-1)
break
final_data = {
'order_number':order_number,
'confirm_date':confirmation_date
}
final_data = pd.DataFrame(final_data)
print(final_data)