Subset sum problem for a possible closest value to the target sum in Python for a really big list

Question:

I know this problem may have been asked by many. My challenge is that my list is long in length (say 100+) and my target may also be big. The recursive algorithm takes 10 minutes to complete. I need any one subset that makes it closest to the target-sum

My Code

combination_dict = {}

def subset_sum(numbers, target, partial=[]):
    s = sum(partial)

    if s == target:
        print("sum({})={}".format(partial, target))
        return
    if s >= target:
        partial.pop()
        combination_dict[sum(partial)] = partial
        return 

    for i in range(len(numbers)):
        n = numbers[i]
        remaining = numbers[i + 1:]
        subset_sum(remaining, target, partial + [n])

if __name__ == "__main__":
lst = [100, 100, 100, 100, 100, 100, 100] #my actual list is really long len=100(max)
tgt = 434
subset_sum(lst, tgt)
closest_sum = max([i for i in combination_dict.keys()])
print("Closest possible sum={} and the combination is ={}".format(closest_sum, combination_dict[closest_sum]))

If my list length is > 20, it is stuck. I understand this has a time complexity of O(2^n)- but can someone please help me get this optimized to get my result in less than 30 seconds.
My list can have floats. And so be the target-sum.

Asked By: Raj Kumar Pandit

||

Answers:

You can use integer linear programming where each variable is a binary that corresponds to a number and represents whether that number is included in the result. It solves two problems, the first approaching the target value from below and the second from above and then takes the best solution. The following is an example implementation using PuLP:

import numpy as np
from pulp import LpMaximize, LpMinimize, LpProblem, lpSum, LpVariable


rng = np.random.default_rng(seed=0)
numbers = rng.integers(1, 10**6, size=10**4)
target = int(numbers.mean() * rng.normal(loc=1, scale=0.1))

indices = range(len(numbers))
variables = LpVariable.dicts("Choice", indices, cat="Binary")

leq_prob = LpProblem('leq', LpMaximize)
geq_prob = LpProblem('geq', LpMinimize)

leq_prob += lpSum([variables[i]*numbers[i] for i in indices]) <= target
leq_prob += lpSum([variables[i]*numbers[i] for i in indices])

geq_prob += lpSum([variables[i]*numbers[i] for i in indices]) >= target
geq_prob += lpSum([variables[i]*numbers[i] for i in indices])

leq_prob.solve()
leq_choices = [numbers[i] for i in indices if variables[i].value() == 1]

if sum(leq_choices) == target:
    solution = leq_choices
else:
    geq_prob.solve()
    geq_choices = [numbers[i] for i in indices if variables[i].value() == 1]

    solution = (
        leq_choices
        if target-sum(leq_choices) <= sum(geq_choices)-target
        else geq_choices
    )

print(f'Solution: {solution}')
print(f'Sum: {sum(solution)} (target: {target})')
Answered By: a_guest

Here’s what I came up with (in case you need a solution with the fewest numbers) thanks to @a_guest and the answer above:

def lp(numbers, target):

    numbers = [n for n in numbers if n <= target]
    indices = range(len(numbers))
    variables = LpVariable.dicts("Choice", indices, cat="Binary")

    leq_prob = LpProblem("leq", LpMaximize)
    geq_prob = LpProblem("geq", LpMinimize)

    leq_prob += lpSum([variables[i] * numbers[i] for i in indices]) <= target
    leq_prob += lpSum([variables[i] * numbers[i] for i in indices])

    geq_prob += lpSum([variables[i] * numbers[i] for i in indices]) >= target
    geq_prob += lpSum([variables[i] * numbers[i] for i in indices])

    leq_prob.solve(PULP_CBC_CMD(msg=False))
    leq_choices = [numbers[i] for i in indices if variables[i].value() == 1]

    if sum(leq_choices) == target:
        solution = leq_choices
    else:
        geq_prob.solve(PULP_CBC_CMD(msg=False))
        geq_choices = [numbers[i] for i in indices if variables[i].value() == 1]

        solution = (
            leq_choices
            if target - sum(leq_choices) <= sum(geq_choices) - target
            else geq_choices
        )

    # Stage 2
    num_prob = LpProblem("eq", LpMinimize)
    num_prob += lpSum([variables[i] * numbers[i] for i in indices]) == sum(solution)
    num_prob += lpSum(variables)
    num_prob.solve(PULP_CBC_CMD(msg=False))

    solution = [numbers[i] for i in indices if variables[i].value() == 1]

    print(f"Solution: {solution}")
    print(f"Sum: {sum(solution)} (target: {target})")
    return solution, target

Example:

numbers = [1, 99, 25, 55, 20]
target = 100
lp(numbers, target)

Gives:

Solution: [1, 99]
Sum: 100 (target: 100)
Answered By: cosmic_inquiry