Finding longest overlapping ranges
Question:
I have ranges in a list like:
ranges = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
I would like to find the longest ranges that can be constructed from these (when they overlap with each other).
Expected output:
[(1, 70), (75, 92)]
I have a solution, however it is way too complicated, and I am sure there must be an easier solution to this problem
My solution:
def overlap(x, y):
return range(max(x[0], y[0]), min(x[-1], y[-1]) + 1)
ranges = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
beg, end = min([x[0] for x in ranges]), 0
for i in ranges:
if i[0] == beg:
end = i[1]
while beg:
for _ in ranges:
for i in ranges:
if i[1] > end and overlap(i, [beg, end]):
end = i[1]
print(beg, end)
try:
beg = min([x[0] for x in ranges if x[0] > end])
for i in ranges:
if i[0] == beg:
end = i[1]
except ValueError:
beg = None
Output:
1 70
75 92
Answers:
You can use the Counter
container from the collections
package and then perform set operations on the combinations of Counter
objects you get using itertools
.
Something along the lines of:
ranges = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
import collections, itertools
import numpy as np
out = []
for range in ranges:
data = np.arange(range[0], range[1]+1)
out.append(collections.Counter(data))
for x,y in list(itertools.combinations(out, 2)): # combinations of two
if x & y: # if they overlap
print(x | y) # get their union
will get you something close to what you want:
Counter({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1, 12: 1, 13: 1, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 1, 25: 1, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 31: 1, 32: 1, 33: 1, 34: 1, 35: 1, 36: 1, 37: 1, 38: 1, 39: 1, 40: 1, 41: 1, 42: 1, 43: 1, 44: 1, 45: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 59: 1, 60: 1, 61: 1, 62: 1, 63: 1, 64: 1, 65: 1, 66: 1, 67: 1, 68: 1, 69: 1, 70: 1})
Counter({75: 1, 76: 1, 77: 1, 78: 1, 79: 1, 80: 1, 81: 1, 82: 1, 83: 1, 84: 1, 85: 1, 86: 1, 87: 1, 88: 1})
Counter({84: 1, 85: 1, 86: 1, 87: 1, 88: 1, 89: 1, 90: 1, 91: 1, 92: 1})
If you keep doing this for multiple layers you’ll get the supersets you want. You can find more here on how to use Counter
.
Using a set to eliminate duplicates, and a sorted list to iterate through, the following should work.
Code:
ranges = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
all_nums = sorted(list(set(x for r in ranges for x in range(r[0], r[1]))))
i = all_nums[0]
print(i, end=' ')
while i < all_nums[-1]:
if i not in all_nums:
print(i)
i = all_nums[all_nums.index(i-1) + 1]
print(i, end = ' ')
i += 1
print(i+1)
Output:
ranges = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
1 70
75 92
ranges = [(1, 50), (55, 70), (75, 82), (84, 88), (87, 92)]
1 50
55 70
75 82
84 92
you can use zip to group all the start values and end values of each range pair. If the start value is lower than the previous end value then there is an overlap so remove that start and end value. we are using an int to track which index in each low and high list we are looking the low index is always one higher than the high index.
#split the numbers in to the low and high part of each range
#and set the index position for each of them
ranges = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
low, high = [list(nums) for nums in zip(*ranges)]
l, h = 1, 0
#Iterate over the ranges and remove when there is an overlap if no over lap move the pointers
while l < len(low) and h < len(high):
if low[l] < high[h]:
del low[l]
del high[h]
else:
l +=1
h +=1
#zip the low and high back into ranges
new_ranges = list(zip(low, high))
print(new_ranges)
OUTPUT
[(1, 70), (75, 92)]
Question: Finding longest overlapping range in ranges
ranges1 = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
ranges2 = [(1, 50), (40,45), (49, 70)]
def get_overlapping(ranges):
result = []
start = 0
end = ranges[0][1]
for i, node in enumerate(ranges[1:], 1):
if end > node[0]:
if end < node[1]:
end = node[1]
continue
result.append((start, i - 1))
end = node[1]
start = i
else:
result.append((start, i))
return result
Usage:
for _range in [ranges1, ranges2]:
result = get_overlapping(_range)
for o in result:
start, end = _range[o[0]], _range[o[1]]
print(start[0], end[1])
print()
Output:
1 70
75 92
1 70
I think you can sort your input by the start of the ranges, then iterate through them. At each item, it is either added to the current range (if the start is less than the end of the current range) or we yield out current range and begin accumulating a new range:
def overlaps(ranges):
ranges = sorted(ranges) # If our inputs are garunteed sorted, we can skip this
it = iter(ranges)
try:
curr_start, curr_stop = next(it)
# overlaps = False # If we want to exclude output ranges not produced by overlapping input ranges
except StopIteration:
return
for start, stop in it:
if curr_start <= start <= curr_stop: # Assumes intervals are closed
curr_stop = max(curr_stop, stop)
# overlaps = True
else:
# if overlaps:
yield curr_start, curr_stop
curr_start, curr_stop = start, stop
# overlaps = False
# if overlaps:
yield curr_start, curr_stop
print(list(overlaps([(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)])))
# [(1, 70), (75, 92)]
print(list(overlaps([(20, 30), (5, 10), (1, 7), (12, 21)])))
# [(1, 10), (12, 30)]
I suggest you iterate your ranges only once, but keep in memory the current range being expanded, like so:
def overlaps(r1, r2):
assert r1[0] <= r2[0], "Assume ranges sorted by first coordinate"
return (r2[0] <= r1[1] <= r2[1]) or (r1[0] <= r2[0] <= r1[1])
ranges = [(1, 50), (45, 47), (49, 70), (75, 85), (84, 88), (87, 92)]
def fuse_ranges(ranges):
output_ranges = []
curr_r = list(ranges[0])
curr_overlap = False # Is the current range already overlapping?
# Assuming it is sorted by starting coordinate.
for r in ranges[1:]:
if overlaps(curr_r, r):
curr_overlap = True
curr_r[1] = max(curr_r[1], r[1]) # Extend the end of the current range.
else:
if curr_overlap:
output_ranges.append(curr_r)
curr_overlap = False
curr_r = list(r)
if curr_overlap:
output_ranges.append(curr_r)
return output_ranges
if __name__ == '__main__':
print(fuse_ranges(sorted(ranges, key=lambda r: r[0])))
Which outputs:
[[1, 70], [75, 92]]
Not sure my solution can be much less verbose than yours though…
Could be done using functools.reduce
:
from functools import reduce
ranges = [(1, 50), (45, 47), (49, 70), (75, 85), (84, 88), (87, 92)]
reducer = (
lambda acc, el: acc[:-1:] + [(min(*acc[-1], *el), max(*acc[-1], *el))]
if acc[-1][1] > el[0]
else acc + [el]
)
print(reduce(reducer, ranges[1::], [ranges[0]]))
Gives:
[(1, 70), (75, 92)]
Hard to put into words, but it uses reduce
to go through the ranges. If the last tuple in the range and the next provided overlap (if acc[-1][1] > el[0]
), it creates a new range from the (min, max)
of both and then replaces this new combined range to what was behind it (acc[:-1:] + [(min, max)]
), otherwise simply adding the new range to the end (acc + [el]
).
Edit: After reviewing other answers, updated to take min/max of the two ranges compared instead of just first and last
Here’s a simple iterative function:
def merge_range(rng):
starts, ends = [], []
for i, (x, y) in enumerate(rng):
if i > 0:
if x<= ends[-1]:
ends[-1] = y
continue
starts.append(x)
ends.append(y)
return list(zip(starts, ends))
Outputs:
merge_range([(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)])
# [(1, 70), (75, 92)]
merge_range([(1, 50), (49, 70), (75, 85), (84, 88), (87, 92), (99, 102), (105, 111), (150, 155), (152, 160), (154, 180)])
# [(1, 70), (75, 92), (99, 102), (105, 111), (150, 180)]
Most of the already posted answers use loops. Have you considered a recursive solution instead:
def merge(ranges):
"""Given a sorted list of range tuples `(a, b)` merge overlapping ranges."""
if not(ranges):
return [];
if len(ranges) == 1:
return ranges;
a, b = ranges[0];
c, d = ranges[1];
# eg.: [(1, 10), (20, 30), rest]
if b < c:
return [(a,b)] + merge(ranges[1:]);
# examples: [(1, 5), (2, 3), rest],
# [(1, 5), (2, 10), rest]
return merge([(a, max(b, d))] + ranges[2:]);
Example
>>> merge([(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)])
[(1, 70), (75, 92)]
>>> merge([(1,10), (2,3), (2,3), (8,12)])
[(1, 12)]
>>> merge (sorted([(2,5),(1,3)], key = lambda x: x[0]))
[(1, 5)]
I have ranges in a list like:
ranges = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
I would like to find the longest ranges that can be constructed from these (when they overlap with each other).
Expected output:
[(1, 70), (75, 92)]
I have a solution, however it is way too complicated, and I am sure there must be an easier solution to this problem
My solution:
def overlap(x, y):
return range(max(x[0], y[0]), min(x[-1], y[-1]) + 1)
ranges = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
beg, end = min([x[0] for x in ranges]), 0
for i in ranges:
if i[0] == beg:
end = i[1]
while beg:
for _ in ranges:
for i in ranges:
if i[1] > end and overlap(i, [beg, end]):
end = i[1]
print(beg, end)
try:
beg = min([x[0] for x in ranges if x[0] > end])
for i in ranges:
if i[0] == beg:
end = i[1]
except ValueError:
beg = None
Output:
1 70
75 92
You can use the Counter
container from the collections
package and then perform set operations on the combinations of Counter
objects you get using itertools
.
Something along the lines of:
ranges = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
import collections, itertools
import numpy as np
out = []
for range in ranges:
data = np.arange(range[0], range[1]+1)
out.append(collections.Counter(data))
for x,y in list(itertools.combinations(out, 2)): # combinations of two
if x & y: # if they overlap
print(x | y) # get their union
will get you something close to what you want:
Counter({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1, 12: 1, 13: 1, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 1, 25: 1, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 31: 1, 32: 1, 33: 1, 34: 1, 35: 1, 36: 1, 37: 1, 38: 1, 39: 1, 40: 1, 41: 1, 42: 1, 43: 1, 44: 1, 45: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 59: 1, 60: 1, 61: 1, 62: 1, 63: 1, 64: 1, 65: 1, 66: 1, 67: 1, 68: 1, 69: 1, 70: 1})
Counter({75: 1, 76: 1, 77: 1, 78: 1, 79: 1, 80: 1, 81: 1, 82: 1, 83: 1, 84: 1, 85: 1, 86: 1, 87: 1, 88: 1})
Counter({84: 1, 85: 1, 86: 1, 87: 1, 88: 1, 89: 1, 90: 1, 91: 1, 92: 1})
If you keep doing this for multiple layers you’ll get the supersets you want. You can find more here on how to use Counter
.
Using a set to eliminate duplicates, and a sorted list to iterate through, the following should work.
Code:
ranges = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
all_nums = sorted(list(set(x for r in ranges for x in range(r[0], r[1]))))
i = all_nums[0]
print(i, end=' ')
while i < all_nums[-1]:
if i not in all_nums:
print(i)
i = all_nums[all_nums.index(i-1) + 1]
print(i, end = ' ')
i += 1
print(i+1)
Output:
ranges = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
1 70
75 92
ranges = [(1, 50), (55, 70), (75, 82), (84, 88), (87, 92)]
1 50
55 70
75 82
84 92
you can use zip to group all the start values and end values of each range pair. If the start value is lower than the previous end value then there is an overlap so remove that start and end value. we are using an int to track which index in each low and high list we are looking the low index is always one higher than the high index.
#split the numbers in to the low and high part of each range
#and set the index position for each of them
ranges = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
low, high = [list(nums) for nums in zip(*ranges)]
l, h = 1, 0
#Iterate over the ranges and remove when there is an overlap if no over lap move the pointers
while l < len(low) and h < len(high):
if low[l] < high[h]:
del low[l]
del high[h]
else:
l +=1
h +=1
#zip the low and high back into ranges
new_ranges = list(zip(low, high))
print(new_ranges)
OUTPUT
[(1, 70), (75, 92)]
Question: Finding longest overlapping range in ranges
ranges1 = [(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)]
ranges2 = [(1, 50), (40,45), (49, 70)]
def get_overlapping(ranges):
result = []
start = 0
end = ranges[0][1]
for i, node in enumerate(ranges[1:], 1):
if end > node[0]:
if end < node[1]:
end = node[1]
continue
result.append((start, i - 1))
end = node[1]
start = i
else:
result.append((start, i))
return result
Usage:
for _range in [ranges1, ranges2]:
result = get_overlapping(_range)
for o in result:
start, end = _range[o[0]], _range[o[1]]
print(start[0], end[1])
print()
Output:
1 70
75 92
1 70
I think you can sort your input by the start of the ranges, then iterate through them. At each item, it is either added to the current range (if the start is less than the end of the current range) or we yield out current range and begin accumulating a new range:
def overlaps(ranges):
ranges = sorted(ranges) # If our inputs are garunteed sorted, we can skip this
it = iter(ranges)
try:
curr_start, curr_stop = next(it)
# overlaps = False # If we want to exclude output ranges not produced by overlapping input ranges
except StopIteration:
return
for start, stop in it:
if curr_start <= start <= curr_stop: # Assumes intervals are closed
curr_stop = max(curr_stop, stop)
# overlaps = True
else:
# if overlaps:
yield curr_start, curr_stop
curr_start, curr_stop = start, stop
# overlaps = False
# if overlaps:
yield curr_start, curr_stop
print(list(overlaps([(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)])))
# [(1, 70), (75, 92)]
print(list(overlaps([(20, 30), (5, 10), (1, 7), (12, 21)])))
# [(1, 10), (12, 30)]
I suggest you iterate your ranges only once, but keep in memory the current range being expanded, like so:
def overlaps(r1, r2):
assert r1[0] <= r2[0], "Assume ranges sorted by first coordinate"
return (r2[0] <= r1[1] <= r2[1]) or (r1[0] <= r2[0] <= r1[1])
ranges = [(1, 50), (45, 47), (49, 70), (75, 85), (84, 88), (87, 92)]
def fuse_ranges(ranges):
output_ranges = []
curr_r = list(ranges[0])
curr_overlap = False # Is the current range already overlapping?
# Assuming it is sorted by starting coordinate.
for r in ranges[1:]:
if overlaps(curr_r, r):
curr_overlap = True
curr_r[1] = max(curr_r[1], r[1]) # Extend the end of the current range.
else:
if curr_overlap:
output_ranges.append(curr_r)
curr_overlap = False
curr_r = list(r)
if curr_overlap:
output_ranges.append(curr_r)
return output_ranges
if __name__ == '__main__':
print(fuse_ranges(sorted(ranges, key=lambda r: r[0])))
Which outputs:
[[1, 70], [75, 92]]
Not sure my solution can be much less verbose than yours though…
Could be done using functools.reduce
:
from functools import reduce
ranges = [(1, 50), (45, 47), (49, 70), (75, 85), (84, 88), (87, 92)]
reducer = (
lambda acc, el: acc[:-1:] + [(min(*acc[-1], *el), max(*acc[-1], *el))]
if acc[-1][1] > el[0]
else acc + [el]
)
print(reduce(reducer, ranges[1::], [ranges[0]]))
Gives:
[(1, 70), (75, 92)]
Hard to put into words, but it uses reduce
to go through the ranges. If the last tuple in the range and the next provided overlap (if acc[-1][1] > el[0]
), it creates a new range from the (min, max)
of both and then replaces this new combined range to what was behind it (acc[:-1:] + [(min, max)]
), otherwise simply adding the new range to the end (acc + [el]
).
Edit: After reviewing other answers, updated to take min/max of the two ranges compared instead of just first and last
Here’s a simple iterative function:
def merge_range(rng):
starts, ends = [], []
for i, (x, y) in enumerate(rng):
if i > 0:
if x<= ends[-1]:
ends[-1] = y
continue
starts.append(x)
ends.append(y)
return list(zip(starts, ends))
Outputs:
merge_range([(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)])
# [(1, 70), (75, 92)]
merge_range([(1, 50), (49, 70), (75, 85), (84, 88), (87, 92), (99, 102), (105, 111), (150, 155), (152, 160), (154, 180)])
# [(1, 70), (75, 92), (99, 102), (105, 111), (150, 180)]
Most of the already posted answers use loops. Have you considered a recursive solution instead:
def merge(ranges):
"""Given a sorted list of range tuples `(a, b)` merge overlapping ranges."""
if not(ranges):
return [];
if len(ranges) == 1:
return ranges;
a, b = ranges[0];
c, d = ranges[1];
# eg.: [(1, 10), (20, 30), rest]
if b < c:
return [(a,b)] + merge(ranges[1:]);
# examples: [(1, 5), (2, 3), rest],
# [(1, 5), (2, 10), rest]
return merge([(a, max(b, d))] + ranges[2:]);
Example
>>> merge([(1, 50), (49, 70), (75, 85), (84, 88), (87, 92)])
[(1, 70), (75, 92)]
>>> merge([(1,10), (2,3), (2,3), (8,12)])
[(1, 12)]
>>> merge (sorted([(2,5),(1,3)], key = lambda x: x[0]))
[(1, 5)]