How can generate n numbers in the interval [a,b] that their summation doesn't exceed k in Python?
Question:
Let me I from the question in an easy way:
I usually have about k=3000€ per month. This month had n=26 working days (in July as you see in the following picture), and generally, I work something between [100,120]€ each day.
Note: k could be +/- x€ if needed, but it should be as minimum as possible.
what I tried to generate n numbers within [a,b] interval, but it should be very close to the k:
import numpy as np
#rng = np.random.default_rng(123)
#arr1 = rng.uniform(100, 120,26)
arr1 = np.random.randint(100,120,26)
#array([107, 115, 116, 105, 104, 110, 110, 107, 116, 110, 101, 112, 109,
# 111, 118, 102, 108, 113, 101, 112, 111, 116, 111, 109, 110, 107])
total = np.sum(arr1)
print(f'Sum of all the elements is {total}')
#Sum of all the elements is 2851
I don’t have any clue to fulfil the condition. The summation of generated random numbers should be close to k [k, k+i] i=minimum e.g. [3000€, 3050€].
Edit1: I tried to compare the distribution quality of generated values offered by plotting/fitting offered solutions from @Murali & @btilly in the form of PDF as below:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
h = arr1
h.sort()
hmean = np.mean(h)
hstd = np.std(h)
pdf = stats.norm.pdf(h, hmean, hstd)
#plt.hist(arr1)
plt.plot(h, pdf,'-o',alpha=0.4) # including h here is crucial
So clearly one has a skew, but the other is the normal distribution.
Answers:
Disclaimer:
As far as I know, there isn’t any built in way to do this. Below is a possible solution.
Possible Solution:
You could run it, then find the difference between 3,000 and the number that the program printed. You could split that by the number of days there are and add that number to all the days.
Example:
The sum is 2896, so subtract that from 3,000. This is 104, and divide that by the number of days (26) to get 4. Add 4 to all the numbers.
Important Notes:
You would have to double check that the numbers aren’t above the maximum allowed. Also, if the number printed happened to be more than 3,000, you would have to do this except with subtraction. If you’re doing this, you have to double check that the numbers aren’t above the minimum allowed.
You can use a Gaussian distribution for a given mean and standard deviation
mu = 3000/26
sigma = 5 ## allowed deviation from mean +- 5 from mean i.e [110.4,120.4]
arr1 = np.random.normal(mu, sigma, 26)
print(np.sum(arr1))
# 3011.268333226019
You can also play with other distributions and see which fits your purpose.
This is yet another problem where the solution in Generating a random string with matched brackets applies.
import random
class DPPath:
def __init__ (self):
self.count = 0
self.next = None
def add_option(self, transition, tail):
if self.next is None:
self.next = {}
self.next[transition] = tail
self.count += tail.count
def random (self):
if 0 == self.count:
return None
else:
# Sadly random.randrange produce invalid results for ranges too large.
max_rand = 2**32
def rand (n):
if n <= max_rand:
return random.randint(0, n)
else:
m = random.randint(0, max_rand)
step = n // max_rand
missing = n - (max_rand * step)
n_lower = m * step + (m * missing) // max_rand
n_upper = (m+1) * step + ((m+1) * missing) // max_rand
return n_lower + rand(n_upper - n_lower)
return self.find(rand(self.count - 1))
def find (self, pos):
result = self._find(pos)
result.pop() # Remove the "total sum transition"
return result
def _find (self, pos):
if self.next is None:
return []
for transition, tail in self.next.items():
if pos < tail.count:
result = tail._find(pos)
result.append(transition)
return result
else:
pos -= tail.count
raise IndexError(f"find item {pos + self.count} out of range for {self.count}")
def sum_options (days, min_n, max_n, min_total, max_total):
# Record that there is one empty sum.
base_dp = DPPath()
base_dp.count = 1
dps = {0: base_dp}
for day in range(days):
prev_dps = {}
for s, dp in dps.items():
for i in range(min_n, max_n+1):
if s + i not in prev_dps:
prev_dps[s+i] = DPPath()
prev_dps[s+i].add_option(i, dp)
dps = prev_dps
# And now we want a dp answer to all in range.
final_dp = DPPath()
for s in range(min_total, max_total+1):
if s in dps:
final_dp.add_option(s, dps[s])
return final_dp
print(sum_options(26, 100, 120, 3000, 3050).random())
Let me I from the question in an easy way:
I usually have about k=3000€ per month. This month had n=26 working days (in July as you see in the following picture), and generally, I work something between [100,120]€ each day.
Note: k could be +/- x€ if needed, but it should be as minimum as possible.
what I tried to generate n numbers within [a,b] interval, but it should be very close to the k:
import numpy as np
#rng = np.random.default_rng(123)
#arr1 = rng.uniform(100, 120,26)
arr1 = np.random.randint(100,120,26)
#array([107, 115, 116, 105, 104, 110, 110, 107, 116, 110, 101, 112, 109,
# 111, 118, 102, 108, 113, 101, 112, 111, 116, 111, 109, 110, 107])
total = np.sum(arr1)
print(f'Sum of all the elements is {total}')
#Sum of all the elements is 2851
I don’t have any clue to fulfil the condition. The summation of generated random numbers should be close to k [k, k+i] i=minimum e.g. [3000€, 3050€].
Edit1: I tried to compare the distribution quality of generated values offered by plotting/fitting offered solutions from @Murali & @btilly in the form of PDF as below:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
h = arr1
h.sort()
hmean = np.mean(h)
hstd = np.std(h)
pdf = stats.norm.pdf(h, hmean, hstd)
#plt.hist(arr1)
plt.plot(h, pdf,'-o',alpha=0.4) # including h here is crucial
So clearly one has a skew, but the other is the normal distribution.
Disclaimer:
As far as I know, there isn’t any built in way to do this. Below is a possible solution.
Possible Solution:
You could run it, then find the difference between 3,000 and the number that the program printed. You could split that by the number of days there are and add that number to all the days.
Example:
The sum is 2896, so subtract that from 3,000. This is 104, and divide that by the number of days (26) to get 4. Add 4 to all the numbers.
Important Notes:
You would have to double check that the numbers aren’t above the maximum allowed. Also, if the number printed happened to be more than 3,000, you would have to do this except with subtraction. If you’re doing this, you have to double check that the numbers aren’t above the minimum allowed.
You can use a Gaussian distribution for a given mean and standard deviation
mu = 3000/26
sigma = 5 ## allowed deviation from mean +- 5 from mean i.e [110.4,120.4]
arr1 = np.random.normal(mu, sigma, 26)
print(np.sum(arr1))
# 3011.268333226019
You can also play with other distributions and see which fits your purpose.
This is yet another problem where the solution in Generating a random string with matched brackets applies.
import random
class DPPath:
def __init__ (self):
self.count = 0
self.next = None
def add_option(self, transition, tail):
if self.next is None:
self.next = {}
self.next[transition] = tail
self.count += tail.count
def random (self):
if 0 == self.count:
return None
else:
# Sadly random.randrange produce invalid results for ranges too large.
max_rand = 2**32
def rand (n):
if n <= max_rand:
return random.randint(0, n)
else:
m = random.randint(0, max_rand)
step = n // max_rand
missing = n - (max_rand * step)
n_lower = m * step + (m * missing) // max_rand
n_upper = (m+1) * step + ((m+1) * missing) // max_rand
return n_lower + rand(n_upper - n_lower)
return self.find(rand(self.count - 1))
def find (self, pos):
result = self._find(pos)
result.pop() # Remove the "total sum transition"
return result
def _find (self, pos):
if self.next is None:
return []
for transition, tail in self.next.items():
if pos < tail.count:
result = tail._find(pos)
result.append(transition)
return result
else:
pos -= tail.count
raise IndexError(f"find item {pos + self.count} out of range for {self.count}")
def sum_options (days, min_n, max_n, min_total, max_total):
# Record that there is one empty sum.
base_dp = DPPath()
base_dp.count = 1
dps = {0: base_dp}
for day in range(days):
prev_dps = {}
for s, dp in dps.items():
for i in range(min_n, max_n+1):
if s + i not in prev_dps:
prev_dps[s+i] = DPPath()
prev_dps[s+i].add_option(i, dp)
dps = prev_dps
# And now we want a dp answer to all in range.
final_dp = DPPath()
for s in range(min_total, max_total+1):
if s in dps:
final_dp.add_option(s, dps[s])
return final_dp
print(sum_options(26, 100, 120, 3000, 3050).random())