Python given an array A of N integers, returns the smallest positive integer (greater than 0) that does not occur in A in O(n) time complexity
Question:
For example:
input: A = [ 6 4 3 -5 0 2 -7 1 ]
output: 5
Since 5 is the smallest positive integer that does not occur in the array.
I have written two solutions to that problem. The first one is good but I don’t want to use any external libraries + its O(n)*log(n) complexity. The second solution "In which I need your help to optimize it" gives an error when the input is chaotic sequences length=10005 (with minus).
Solution 1:
from itertools import count, filterfalse
def minpositive(a):
return(next(filterfalse(set(a).__contains__, count(1))))
Solution 2:
def minpositive(a):
count = 0
b = list(set([i for i in a if i>0]))
if min(b, default = 0) > 1 or min(b, default = 0) == 0 :
min_val = 1
else:
min_val = min([b[i-1]+1 for i, x in enumerate(b) if x - b[i - 1] >1], default=b[-1]+1)
return min_val
Note: This was a demo test in codility, solution 1 got 100% and
solution 2 got 77 %.
Error in "solution2" was due to:
Performance tests ->
medium chaotic sequences length=10005 (with minus) got 3 expected
10000
Performance tests -> large chaotic + many -1, 1, 2, 3 (with
minus) got 5 expected 10000
Answers:
Testing for the presence of a number in a set is fast in Python so you could try something like this:
def minpositive(a):
A = set(a)
ans = 1
while ans in A:
ans += 1
return ans
def solution(A):
B = set(sorted(A))
m = 1
for x in B:
if x == m:
m+=1
return m
Fast for large arrays.
def minpositive(arr):
if 1 not in arr: # protection from error if ( max(arr) < 0 )
return 1
else:
maxArr = max(arr) # find max element in 'arr'
c1 = set(range(2, maxArr+2)) # create array from 2 to max
c2 = c1 - set(arr) # find all positive elements outside the array
return min(c2)
I just modified the answer by @najeeb-jebreel and now the function gives an optimal solution.
def solution(A):
sorted_set = set(sorted(A))
sol = 1
for x in sorted_set:
if x == sol:
sol += 1
else:
break
return sol
def minpositive(A):
"""Given an list A of N integers,
returns the smallest positive integer (greater than 0)
that does not occur in A in O(n) time complexity
Args:
A: list of integers
Returns:
integer: smallest positive integer
e.g:
A = [1,2,3]
smallest_positive_int = 4
"""
len_nrs_list = len(A)
N = set(range(1, len_nrs_list+2))
return min(N-set(A)) #gets the min value using the N integers
Continuing on from Niroj Shrestha and najeeb-jebreel, added an initial portion to avoid iteration in case of a complete set. Especially important if the array is very large.
def smallest_positive_int(A):
sorted_A = sorted(A)
last_in_sorted_A = sorted_A[-1]
#check if straight continuous list
if len(sorted_A) == last_in_sorted_A:
return last_in_sorted_A + 1
else:
#incomplete list, iterate to find the smallest missing number
sol=1
for x in sorted_A:
if x == sol:
sol += 1
else:
break
return sol
A = [1,2,7,4,5,6]
print(smallest_positive_int(A))
def solution(A):
arr = set(A)
N = set(range(1, 100001))
while N in arr:
N += 1
return min(N - arr)
solution([1, 2, 6, 4])
#returns 3
I reduced the length of set before comparing
a=[1,222,3,4,24,5,6,7,8,9,10,15,2,3,3,11,-1]
#a=[1,2,3,6,3]
def sol(a_array):
a_set=set()
b_set=set()
cnt=1
for i in a_array:
#In order to get the greater performance
#Checking if element is greater than length+1
#then it can't be output( our result in solution)
if i<=len(a) and i >=1:
a_set.add(i) # Adding array element in set
b_set.add(cnt) # Adding iterator in set
cnt=cnt+1
b_set=b_set.difference(a_set)
if((len(b_set)) > 1):
return(min(b_set))
else:
return max(a_set)+1
sol(a)
def solution(A):
nw_A = sorted(set(A))
if all(i < 0 for i in nw_A):
return 1
else:
ans = 1
while ans in nw_A:
ans += 1
if ans not in nw_A:
return ans
For better performance if there is a possibility to import numpy package.
def solution(A):
import numpy as np
nw_A = np.unique(np.array(A))
if np.all((nw_A < 0)):
return 1
else:
ans = 1
while ans in nw_A:
ans += 1
if ans not in nw_A:
return ans
This question doesn’t really need another answer, but there is a solution that has not been proposed yet, that I believe to be faster than what’s been presented so far.
As others have pointed out, we know the answer lies in the range [1, len(A)+1]
, inclusively. We can turn that into a set and take the minimum element in the set difference with A. That’s a good O(N) solution since set operations are O(1).
However, we don’t need to use a Python set to store [1, len(A)+1]
, because we’re starting with a dense set. We can use an array instead, which will replace set hashing by list indexing and give us another O(N) solution with a lower constant.
def minpositive(a):
# the "set" of possible answer - values_found[i-1] will tell us whether i is in a
values_found = [False] * (len(a)+1)
# note any values in a in the range [1, len(a)+1] as found
for i in a:
if i > 0 and i <= len(a)+1:
values_found[i-1] = True
# extract the smallest value not found
for i, found in enumerate(values_found):
if not found:
return i+1
We know the final for loop always finds a value that was not marked, because it has one more element than a
, so at least one of its cells was not set to True
.
def check_min(a):
x= max(a)
if x-1 in a:
return x+1
elif x <= 0:
return 1
else:
return x-1
Correct me if i’m wrong but this works for me.
def solution(A):
clone = 1
A.sort()
for itr in range(max(A) + 2):
if itr not in A and itr >= 1:
clone = itr
break
return clone
print(solution([2,1,4,7]))
#returns 3
This solution passes the performance test with a score of 100%
def solution(A):
n = sorted(i for i in set(A) if i > 0) # Remove duplicates and negative numbers
if not n:
return 1
ln = len(n)
for i in range(1, ln + 1):
if i != n[i - 1]:
return i
return ln + 1
def solution(A):
# write your code in Python 3.6
min_num = float("inf")
set_A = set(A)
# finding the smallest number
for num in set_A:
if num < min_num:
min_num = num
# print(min_num)
#if negative make positive
if min_num < 0 or min_num == 0:
min_num = 1
# print(min_num)
# if in set add 1 until not
while min_num in set_A:
min_num += 1
return min_num
Not sure why this is not 100% in correctness. It is 100% performance
I have an easy solution. No need to sort.
def solution(A):
s = set(A)
m = max(A) + 2
for N in range(1, m):
if N not in s:
return N
return 1
Note: It is 100% total score (Correctness & Performance)
def solution(A):
n = 1
for i in A:
if n in A:
n = n+1
else:
return n
return n
def not_in_A(a):
a=sorted(a)
if max(a)<1:
return(1)
for i in range(0,len(a)-1):
if a[i+1]-a[i]>1:
out=a[i]+1
if out==0 or out<1:
continue
return(out)
return(max(a)+1)
mark and then find the first one that didn’t find
nums = [ 6, 4, 3, -5, 0, 2, -7, 1 ]
def check_min(nums):
marks = [-1] * len(nums)
for idx, num in enumerate(nums):
if num >= 0:
marks[num] = idx
for idx, mark in enumerate(marks):
if mark == -1:
return idx
return idx + 1
For example:
input: A = [ 6 4 3 -5 0 2 -7 1 ]
output: 5
Since 5 is the smallest positive integer that does not occur in the array.
I have written two solutions to that problem. The first one is good but I don’t want to use any external libraries + its O(n)*log(n) complexity. The second solution "In which I need your help to optimize it" gives an error when the input is chaotic sequences length=10005 (with minus).
Solution 1:
from itertools import count, filterfalse
def minpositive(a):
return(next(filterfalse(set(a).__contains__, count(1))))
Solution 2:
def minpositive(a):
count = 0
b = list(set([i for i in a if i>0]))
if min(b, default = 0) > 1 or min(b, default = 0) == 0 :
min_val = 1
else:
min_val = min([b[i-1]+1 for i, x in enumerate(b) if x - b[i - 1] >1], default=b[-1]+1)
return min_val
Note: This was a demo test in codility, solution 1 got 100% and
solution 2 got 77 %.
Error in "solution2" was due to:
Performance tests ->
medium chaotic sequences length=10005 (with minus) got 3 expected
10000
Performance tests -> large chaotic + many -1, 1, 2, 3 (with
minus) got 5 expected 10000
Testing for the presence of a number in a set is fast in Python so you could try something like this:
def minpositive(a):
A = set(a)
ans = 1
while ans in A:
ans += 1
return ans
def solution(A):
B = set(sorted(A))
m = 1
for x in B:
if x == m:
m+=1
return m
Fast for large arrays.
def minpositive(arr):
if 1 not in arr: # protection from error if ( max(arr) < 0 )
return 1
else:
maxArr = max(arr) # find max element in 'arr'
c1 = set(range(2, maxArr+2)) # create array from 2 to max
c2 = c1 - set(arr) # find all positive elements outside the array
return min(c2)
I just modified the answer by @najeeb-jebreel and now the function gives an optimal solution.
def solution(A):
sorted_set = set(sorted(A))
sol = 1
for x in sorted_set:
if x == sol:
sol += 1
else:
break
return sol
def minpositive(A):
"""Given an list A of N integers,
returns the smallest positive integer (greater than 0)
that does not occur in A in O(n) time complexity
Args:
A: list of integers
Returns:
integer: smallest positive integer
e.g:
A = [1,2,3]
smallest_positive_int = 4
"""
len_nrs_list = len(A)
N = set(range(1, len_nrs_list+2))
return min(N-set(A)) #gets the min value using the N integers
Continuing on from Niroj Shrestha and najeeb-jebreel, added an initial portion to avoid iteration in case of a complete set. Especially important if the array is very large.
def smallest_positive_int(A):
sorted_A = sorted(A)
last_in_sorted_A = sorted_A[-1]
#check if straight continuous list
if len(sorted_A) == last_in_sorted_A:
return last_in_sorted_A + 1
else:
#incomplete list, iterate to find the smallest missing number
sol=1
for x in sorted_A:
if x == sol:
sol += 1
else:
break
return sol
A = [1,2,7,4,5,6]
print(smallest_positive_int(A))
def solution(A):
arr = set(A)
N = set(range(1, 100001))
while N in arr:
N += 1
return min(N - arr)
solution([1, 2, 6, 4])
#returns 3
I reduced the length of set before comparing
a=[1,222,3,4,24,5,6,7,8,9,10,15,2,3,3,11,-1]
#a=[1,2,3,6,3]
def sol(a_array):
a_set=set()
b_set=set()
cnt=1
for i in a_array:
#In order to get the greater performance
#Checking if element is greater than length+1
#then it can't be output( our result in solution)
if i<=len(a) and i >=1:
a_set.add(i) # Adding array element in set
b_set.add(cnt) # Adding iterator in set
cnt=cnt+1
b_set=b_set.difference(a_set)
if((len(b_set)) > 1):
return(min(b_set))
else:
return max(a_set)+1
sol(a)
def solution(A):
nw_A = sorted(set(A))
if all(i < 0 for i in nw_A):
return 1
else:
ans = 1
while ans in nw_A:
ans += 1
if ans not in nw_A:
return ans
For better performance if there is a possibility to import numpy package.
def solution(A):
import numpy as np
nw_A = np.unique(np.array(A))
if np.all((nw_A < 0)):
return 1
else:
ans = 1
while ans in nw_A:
ans += 1
if ans not in nw_A:
return ans
This question doesn’t really need another answer, but there is a solution that has not been proposed yet, that I believe to be faster than what’s been presented so far.
As others have pointed out, we know the answer lies in the range [1, len(A)+1]
, inclusively. We can turn that into a set and take the minimum element in the set difference with A. That’s a good O(N) solution since set operations are O(1).
However, we don’t need to use a Python set to store [1, len(A)+1]
, because we’re starting with a dense set. We can use an array instead, which will replace set hashing by list indexing and give us another O(N) solution with a lower constant.
def minpositive(a):
# the "set" of possible answer - values_found[i-1] will tell us whether i is in a
values_found = [False] * (len(a)+1)
# note any values in a in the range [1, len(a)+1] as found
for i in a:
if i > 0 and i <= len(a)+1:
values_found[i-1] = True
# extract the smallest value not found
for i, found in enumerate(values_found):
if not found:
return i+1
We know the final for loop always finds a value that was not marked, because it has one more element than a
, so at least one of its cells was not set to True
.
def check_min(a):
x= max(a)
if x-1 in a:
return x+1
elif x <= 0:
return 1
else:
return x-1
Correct me if i’m wrong but this works for me.
def solution(A):
clone = 1
A.sort()
for itr in range(max(A) + 2):
if itr not in A and itr >= 1:
clone = itr
break
return clone
print(solution([2,1,4,7]))
#returns 3
This solution passes the performance test with a score of 100%
def solution(A):
n = sorted(i for i in set(A) if i > 0) # Remove duplicates and negative numbers
if not n:
return 1
ln = len(n)
for i in range(1, ln + 1):
if i != n[i - 1]:
return i
return ln + 1
def solution(A):
# write your code in Python 3.6
min_num = float("inf")
set_A = set(A)
# finding the smallest number
for num in set_A:
if num < min_num:
min_num = num
# print(min_num)
#if negative make positive
if min_num < 0 or min_num == 0:
min_num = 1
# print(min_num)
# if in set add 1 until not
while min_num in set_A:
min_num += 1
return min_num
Not sure why this is not 100% in correctness. It is 100% performance
I have an easy solution. No need to sort.
def solution(A):
s = set(A)
m = max(A) + 2
for N in range(1, m):
if N not in s:
return N
return 1
Note: It is 100% total score (Correctness & Performance)
def solution(A):
n = 1
for i in A:
if n in A:
n = n+1
else:
return n
return n
def not_in_A(a):
a=sorted(a)
if max(a)<1:
return(1)
for i in range(0,len(a)-1):
if a[i+1]-a[i]>1:
out=a[i]+1
if out==0 or out<1:
continue
return(out)
return(max(a)+1)
mark and then find the first one that didn’t find
nums = [ 6, 4, 3, -5, 0, 2, -7, 1 ]
def check_min(nums):
marks = [-1] * len(nums)
for idx, num in enumerate(nums):
if num >= 0:
marks[num] = idx
for idx, mark in enumerate(marks):
if mark == -1:
return idx
return idx + 1