How to shuffle an array of numbers without two consecutive elements repeating?
Question:
I’m currently trying to get an array of numbers like this one randomly shuffled:
label_array = np.repeat(np.arange(6), 12)
The only constrain is that no consecutive elements of the shuffle must be the same number. For that I’m currently using this code:
# Check if there are any occurrences of two consecutive
# elements being of the same category (same number)
num_occurrences = np.sum(np.diff(label_array) == 0)
# While there are any occurrences of this...
while num_occurrences != 0:
# ...shuffle the array...
np.random.shuffle(label_array)
# ...create a flag for occurrences...
flag = np.hstack(([False], np.diff(label_array) == 0))
flag_array = label_array[flag]
# ...and shuffle them.
np.random.shuffle(flag_array)
# Then re-assign them to the original array...
label_array[flag] = flag_array
# ...and check the number of occurrences again.
num_occurrences = np.sum(np.diff(label_array) == 0)
Although this works for an array of this size, I don’t know if it would work for much bigger arrays. And even so, it may take a lot of time.
So, is there a better way of doing this?
Answers:
Here is a way to do it, for Python >= 3.6, using random.choices, which allows to choose from a population with weights.
The idea is to generate the numbers one by one. Each time we generate a new number, we exclude the previous one by temporarily setting its weight to zero. Then, we decrement the weight of the chosen one.
As @roganjosh duly noted, we have a problem at the end when we are left with more than one instance of the last value – and that can be really frequent, especially with a small number of values and a large number of repeats.
The solution I used is to insert these value back into the list where they don’t create a conflict, with the short send_back
function.
import random
def send_back(value, number, lst):
idx = len(lst)-2
for _ in range(number):
while lst[idx] == value or lst[idx-1] == value:
idx -= 1
lst.insert(idx, value)
def shuffle_without_doubles(nb_values, repeats):
population = list(range(nb_values))
weights = [repeats] * nb_values
out = []
prev = None
for i in range(nb_values * repeats):
if prev is not None:
# remove prev from the list of possible choices
# by turning its weight temporarily to zero
old_weight = weights[prev]
weights[prev] = 0
try:
chosen = random.choices(population, weights)[0]
except IndexError:
# We are here because all of our weights are 0,
# which means that all is left to choose from
# is old_weight times the previous value
send_back(prev, old_weight, out)
break
out.append(chosen)
weights[chosen] -= 1
if prev is not None:
# restore weight
weights[prev] = old_weight
prev = chosen
return out
print(shuffle_without_doubles(6, 12))
[5, 1, 3, 4, 3, 2, 1, 5, 3, 5, 2, 0, 5, 4, 3, 4, 5,
3, 4, 0, 4, 1, 0, 1, 5, 3, 0, 2, 3, 4, 1, 2, 4, 1,
0, 2, 0, 2, 5, 0, 2, 1, 0, 5, 2, 0, 5, 0, 3, 2, 1,
2, 1, 5, 1, 3, 5, 4, 2, 4, 0, 4, 2, 4, 0, 1, 3, 4,
5, 3, 1, 3]
Some crude timing: it takes about 30 seconds to generate (shuffle_without_doubles(600, 1200))
, so 720000 values.
May not be technically the best answer, hopefully it suffices for your requirements.
import numpy as np
def generate_random_array(block_length, block_count):
for blocks in range(0, block_count):
nums = np.arange(block_length)
np.random.shuffle(nums)
try:
if nums[0] == randoms_array [-1]:
nums[0], nums[-1] = nums[-1], nums[0]
except NameError:
randoms_array = []
randoms_array.extend(nums)
return randoms_array
generate_random_array(block_length=1000, block_count=1000)
I came from Creating a list without back-to-back repetitions from multiple repeating elements (referred as "problem A") as I organise my notes and there was no correct answer under "problem A" nor in the current one. Also these two problems seems different because problem A requires same elements.
Basically what you asked is same as an algorithm problem (link) where the randomness is not required. But when you have like almost half of all numbers same, the result can only be like "ABACADAEA…", where "ABCDE" are numbers. In the most voted answer to this problem, a priority queue is used so the time complexity is O(n log m)
, where n is the length of the output and m
is the count of option.
As for this problem A easier way is to use itertools.permutations
and randomly select some of them with different beginning and ending so it looks like "random"
I write draft code here and it works.
from itertools import permutations
from random import choice
def no_dup_shuffle(ele_count: int, repeat: int):
"""
Return a shuffle of `ele_count` elements repeating `repeat` times.
"""
p = permutations(range(ele_count))
res = []
curr = last = [-1] # -1 is a dummy value for the first `extend`
for _ in range(repeat):
while curr[0] == last[-1]:
curr = choice(list(p))
res.extend(curr)
last = curr
return res
def test_no_dup_shuffle(count, rep):
r = no_dup_shuffle(count, rep)
assert len(r) == count * rep # check result length
assert len(set(r)) == count # check all elements are used and in `range(count)`
for i, n in enumerate(r): # check no duplicate
assert n != r[i - 1]
print(r)
if __name__ == "__main__":
test_no_dup_shuffle(5, 3)
test_no_dup_shuffle(3, 17)
I’m currently trying to get an array of numbers like this one randomly shuffled:
label_array = np.repeat(np.arange(6), 12)
The only constrain is that no consecutive elements of the shuffle must be the same number. For that I’m currently using this code:
# Check if there are any occurrences of two consecutive
# elements being of the same category (same number)
num_occurrences = np.sum(np.diff(label_array) == 0)
# While there are any occurrences of this...
while num_occurrences != 0:
# ...shuffle the array...
np.random.shuffle(label_array)
# ...create a flag for occurrences...
flag = np.hstack(([False], np.diff(label_array) == 0))
flag_array = label_array[flag]
# ...and shuffle them.
np.random.shuffle(flag_array)
# Then re-assign them to the original array...
label_array[flag] = flag_array
# ...and check the number of occurrences again.
num_occurrences = np.sum(np.diff(label_array) == 0)
Although this works for an array of this size, I don’t know if it would work for much bigger arrays. And even so, it may take a lot of time.
So, is there a better way of doing this?
Here is a way to do it, for Python >= 3.6, using random.choices, which allows to choose from a population with weights.
The idea is to generate the numbers one by one. Each time we generate a new number, we exclude the previous one by temporarily setting its weight to zero. Then, we decrement the weight of the chosen one.
As @roganjosh duly noted, we have a problem at the end when we are left with more than one instance of the last value – and that can be really frequent, especially with a small number of values and a large number of repeats.
The solution I used is to insert these value back into the list where they don’t create a conflict, with the short send_back
function.
import random
def send_back(value, number, lst):
idx = len(lst)-2
for _ in range(number):
while lst[idx] == value or lst[idx-1] == value:
idx -= 1
lst.insert(idx, value)
def shuffle_without_doubles(nb_values, repeats):
population = list(range(nb_values))
weights = [repeats] * nb_values
out = []
prev = None
for i in range(nb_values * repeats):
if prev is not None:
# remove prev from the list of possible choices
# by turning its weight temporarily to zero
old_weight = weights[prev]
weights[prev] = 0
try:
chosen = random.choices(population, weights)[0]
except IndexError:
# We are here because all of our weights are 0,
# which means that all is left to choose from
# is old_weight times the previous value
send_back(prev, old_weight, out)
break
out.append(chosen)
weights[chosen] -= 1
if prev is not None:
# restore weight
weights[prev] = old_weight
prev = chosen
return out
print(shuffle_without_doubles(6, 12))
[5, 1, 3, 4, 3, 2, 1, 5, 3, 5, 2, 0, 5, 4, 3, 4, 5,
3, 4, 0, 4, 1, 0, 1, 5, 3, 0, 2, 3, 4, 1, 2, 4, 1,
0, 2, 0, 2, 5, 0, 2, 1, 0, 5, 2, 0, 5, 0, 3, 2, 1,
2, 1, 5, 1, 3, 5, 4, 2, 4, 0, 4, 2, 4, 0, 1, 3, 4,
5, 3, 1, 3]
Some crude timing: it takes about 30 seconds to generate (shuffle_without_doubles(600, 1200))
, so 720000 values.
May not be technically the best answer, hopefully it suffices for your requirements.
import numpy as np
def generate_random_array(block_length, block_count):
for blocks in range(0, block_count):
nums = np.arange(block_length)
np.random.shuffle(nums)
try:
if nums[0] == randoms_array [-1]:
nums[0], nums[-1] = nums[-1], nums[0]
except NameError:
randoms_array = []
randoms_array.extend(nums)
return randoms_array
generate_random_array(block_length=1000, block_count=1000)
I came from Creating a list without back-to-back repetitions from multiple repeating elements (referred as "problem A") as I organise my notes and there was no correct answer under "problem A" nor in the current one. Also these two problems seems different because problem A requires same elements.
Basically what you asked is same as an algorithm problem (link) where the randomness is not required. But when you have like almost half of all numbers same, the result can only be like "ABACADAEA…", where "ABCDE" are numbers. In the most voted answer to this problem, a priority queue is used so the time complexity is O(n log m)
, where n is the length of the output and m
is the count of option.
As for this problem A easier way is to use itertools.permutations
and randomly select some of them with different beginning and ending so it looks like "random"
I write draft code here and it works.
from itertools import permutations
from random import choice
def no_dup_shuffle(ele_count: int, repeat: int):
"""
Return a shuffle of `ele_count` elements repeating `repeat` times.
"""
p = permutations(range(ele_count))
res = []
curr = last = [-1] # -1 is a dummy value for the first `extend`
for _ in range(repeat):
while curr[0] == last[-1]:
curr = choice(list(p))
res.extend(curr)
last = curr
return res
def test_no_dup_shuffle(count, rep):
r = no_dup_shuffle(count, rep)
assert len(r) == count * rep # check result length
assert len(set(r)) == count # check all elements are used and in `range(count)`
for i, n in enumerate(r): # check no duplicate
assert n != r[i - 1]
print(r)
if __name__ == "__main__":
test_no_dup_shuffle(5, 3)
test_no_dup_shuffle(3, 17)