Create all possible combinations of lists of different sizes in numpy
Question:
I want to create a numpy array with all possible combinations of items from multiple lists of different sizes:
a = [1, 2]
b = [3, 4]
c = [5, 6, 7]
d = [8, 9, 10]
In each combination, I want 2 elements. I don’t want any duplicates, and I don’t want items from the same list to mix together.
I can get all such combinations with 3 elements with np.array(np.meshgrid(a, b, c, d)).T.reshape(-1,3)
but I need pairs, not triplets. Doing np.array(np.meshgrid(a, b, c, d)).T.reshape(-1,2)
doesn’t work because it just cuts off one column of the original array.
Any ideas on how to achieve this?
Answers:
So Itertools is great for this. The first thing you want to do is conjoin your list into a single iterable list (list of lists). The first step is to get all combinations of list.
from itertools import combinations, product
a = [1, 2]
b = [3, 4]
c = [5, 6, 7]
d = [8, 9, 10]
total = [a,b,c,d]
for item in combinations(total, 2):
print(item)
which returns
([1, 2], [3, 4])
([1, 2], [5, 6, 7])
([1, 2], [8, 9, 10])
([3, 4], [5, 6, 7])
([3, 4], [8, 9, 10])
([5, 6, 7], [8, 9, 10])
The you can simply iterate over the individual lists as below
from itertools import combinations
a = [1, 2]
b = [3, 4]
c = [5, 6, 7]
d = [8, 9, 10]
total = [a,b,c,d]
for item in combinations(total, 2):
for sub_item in item[0]:
for second_sub_item in item[1]:
print(sub_item, second_sub_item)
print out is
1 3
1 4
2 3
2 4
1 5
1 6
1 7
2 5
2 6
2 7
1 8
1 9
1 10
2 8
2 9
2 10
3 5
3 6
3 7
4 5
4 6
4 7
3 8
3 9
3 10
4 8
4 9
4 10
5 8
5 9
5 10
6 8
6 9
6 10
7 8
7 9
7 10
Similar to Olvin Roght’s comment, but if you put your sublists in a list you can do:
>>>> ls = [[1,2],[3,4],[5,6,7],[8,9,10]]
>>>> [item for cmb in combinations(ls, 2) for item in product(*cmb)]
[(1, 3), (1, 4), (2, 3), (2, 4), (1, 5), (1, 6), (1, 7), (2, 5), (2, 6), (2, 7), (1, 8), (1, 9), (1, 10), (2, 8), (2, 9), (2, 10), (3, 5), (3, 6), (3, 7), (4, 5), (4, 6), (4, 7), (3, 8), (3, 9), (3, 10), (4, 8), (4, 9), (4, 10), (5, 8), (5, 9), (5, 10), (6, 8), (6, 9), (6, 10), (7, 8), (7, 9), (7, 10)]
Here’s an alternative if you want to use only numpy without using itertools.
def all_combinations(arrays):
"""
:param arrays: tuple of 1D lists.
the functions returns the combinations of all these lists.
:return: np.array of shape (len_out, 2).
np.array of all the possible combinations.
"""
len_array = np.asarray([len(elt) for elt in arrays])
# the length of out is equal to the sum of the products
# of each element of len_array with another element of len_array
len_out = (np.sum(len_array) ** 2 - np.sum(len_array ** 2)) // 2
out, next_i = np.empty((len_out, 2), dtype=int), 0
new_elt = arrays[0]
for elt in arrays[1:]:
out_elt = np.asarray(np.meshgrid(new_elt, elt)).T.reshape(-1, 2)
next_j = next_i + len(out_elt)
out[next_i: next_j] = out_elt
next_i = next_j
new_elt = np.concatenate((new_elt, elt))
return out
Example:
>>> arrays = ([1, 2], [3, 4], [5, 6, 7], [8, 9, 10])
>>> all_combinations(arrays)
[[ 1 3]
[ 1 4]
[ 2 3]
...
...
[ 7 8]
[ 7 9]
[ 7 10]]
I want to create a numpy array with all possible combinations of items from multiple lists of different sizes:
a = [1, 2]
b = [3, 4]
c = [5, 6, 7]
d = [8, 9, 10]
In each combination, I want 2 elements. I don’t want any duplicates, and I don’t want items from the same list to mix together.
I can get all such combinations with 3 elements with np.array(np.meshgrid(a, b, c, d)).T.reshape(-1,3)
but I need pairs, not triplets. Doing np.array(np.meshgrid(a, b, c, d)).T.reshape(-1,2)
doesn’t work because it just cuts off one column of the original array.
Any ideas on how to achieve this?
So Itertools is great for this. The first thing you want to do is conjoin your list into a single iterable list (list of lists). The first step is to get all combinations of list.
from itertools import combinations, product
a = [1, 2]
b = [3, 4]
c = [5, 6, 7]
d = [8, 9, 10]
total = [a,b,c,d]
for item in combinations(total, 2):
print(item)
which returns
([1, 2], [3, 4])
([1, 2], [5, 6, 7])
([1, 2], [8, 9, 10])
([3, 4], [5, 6, 7])
([3, 4], [8, 9, 10])
([5, 6, 7], [8, 9, 10])
The you can simply iterate over the individual lists as below
from itertools import combinations
a = [1, 2]
b = [3, 4]
c = [5, 6, 7]
d = [8, 9, 10]
total = [a,b,c,d]
for item in combinations(total, 2):
for sub_item in item[0]:
for second_sub_item in item[1]:
print(sub_item, second_sub_item)
print out is
1 3
1 4
2 3
2 4
1 5
1 6
1 7
2 5
2 6
2 7
1 8
1 9
1 10
2 8
2 9
2 10
3 5
3 6
3 7
4 5
4 6
4 7
3 8
3 9
3 10
4 8
4 9
4 10
5 8
5 9
5 10
6 8
6 9
6 10
7 8
7 9
7 10
Similar to Olvin Roght’s comment, but if you put your sublists in a list you can do:
>>>> ls = [[1,2],[3,4],[5,6,7],[8,9,10]]
>>>> [item for cmb in combinations(ls, 2) for item in product(*cmb)]
[(1, 3), (1, 4), (2, 3), (2, 4), (1, 5), (1, 6), (1, 7), (2, 5), (2, 6), (2, 7), (1, 8), (1, 9), (1, 10), (2, 8), (2, 9), (2, 10), (3, 5), (3, 6), (3, 7), (4, 5), (4, 6), (4, 7), (3, 8), (3, 9), (3, 10), (4, 8), (4, 9), (4, 10), (5, 8), (5, 9), (5, 10), (6, 8), (6, 9), (6, 10), (7, 8), (7, 9), (7, 10)]
Here’s an alternative if you want to use only numpy without using itertools.
def all_combinations(arrays):
"""
:param arrays: tuple of 1D lists.
the functions returns the combinations of all these lists.
:return: np.array of shape (len_out, 2).
np.array of all the possible combinations.
"""
len_array = np.asarray([len(elt) for elt in arrays])
# the length of out is equal to the sum of the products
# of each element of len_array with another element of len_array
len_out = (np.sum(len_array) ** 2 - np.sum(len_array ** 2)) // 2
out, next_i = np.empty((len_out, 2), dtype=int), 0
new_elt = arrays[0]
for elt in arrays[1:]:
out_elt = np.asarray(np.meshgrid(new_elt, elt)).T.reshape(-1, 2)
next_j = next_i + len(out_elt)
out[next_i: next_j] = out_elt
next_i = next_j
new_elt = np.concatenate((new_elt, elt))
return out
Example:
>>> arrays = ([1, 2], [3, 4], [5, 6, 7], [8, 9, 10])
>>> all_combinations(arrays)
[[ 1 3]
[ 1 4]
[ 2 3]
...
...
[ 7 8]
[ 7 9]
[ 7 10]]