In Python, efficiently determine if two lists are shifted copies of one another

Question:

What’s the most efficient (in time) way of checking if two relatively short (about 3-8 elements) lists are shifted copies of one another? And if so, determine and return the offset?

Here is the example code and output I’d like:

>>> def is_shifted_copy(list_one, list_two):
>>>     # TODO
>>>
>>> is_shifted_copy([1, 2, 3], [1, 2, 3])
0
>>> is_shifted_copy([1, 2, 3], [3, 1, 2])
1
>>> is_shifted_copy([1, 2, 3], [2, 3, 1])
2
>>> is_shifted_copy([1, 2, 3], [3, 2, 1])
None
>>> is_shifted_copy([1, 2, 3], [1])
None
>>> is_shifted_copy([1, 1, 2], [2, 1, 1])
1

Lists may have duplicate entries. If more than one offset is valid, return any offset.

Asked By: devtk

||

Answers:

here’s a simple iterator version that does the job in 2n iterations (n being the length of list)

import itertools

def is_shifted_copy(list1, list2):

    if len(list1) != len(list2):
        return False

    iterator = iter(list2)

    for i, item in enumerate(itertools.chain(list1, list1)):
        try:
            if item == iterator.next():
                continue
            else:
                iterator = iter(list2)
        except StopIteration:
            return i - len(list2)

    else:
        return False


print is_shifted_copy([1, 2, 3], [1, 2, 3]) #0
print is_shifted_copy([1, 2, 3], [3, 1, 2]) #2
print is_shifted_copy([1, 2, 3], [3, 2, 1]) #False
print is_shifted_copy([1, 2, 3], [2, 3, 1]) #1
print is_shifted_copy([1, 1, 2], [2, 1, 1]) #2
print is_shifted_copy([1, 2, 3], [1]) #False
print is_shifted_copy([1, 2, 1], [2, 1, 1]) #1
print is_shifted_copy([1, 1, 1], [1, 1, 1]) #0

and from your specification,

shouldn’t is_shifted_copy([1, 1, 2], [2, 1, 1]) return 2?

Answered By: thkang

Here is a solution based on indexes and slicing:

>>> def is_shifted_copy(l1, l2):
    try:
        return [l1[-i:] + l1[:-i] for i in range(len(l1))].index(l2)
    except ValueError:
        return None

The result is as expected:

>>> is_shifted_copy([1, 2, 3], [1, 2, 3])
0
>>> is_shifted_copy([1, 2, 3], [3, 1, 2])
1
>>> is_shifted_copy([1, 2, 3], [2, 3, 1])
2
>>> is_shifted_copy([1, 2, 3], [2, 1, 3])
None
Answered By: Emmanuel

Below is a modified version of NPE’s solution, which checks for all possible match positions. It compares favorably to thkang’s (and ecatmur’s) clever iterator solutions:

import itertools as IT

def is_shifted_copy_thkang(list1, list2):
    N = len(list1)
    if N != len(list2):
        return None
    elif N == 0:
        return 0
    next_item = iter(list2).next
    for i, item in enumerate(IT.chain(list1, list1)):
        try:
            if item == next_item():
                continue
            else:
                next_item = iter(list2).next
        except StopIteration:
            return -i % N
    else:
        return None

def is_shifted_copy_NPE(list1, list2):
    N = len(list1)
    if N != len(list2):
        return None
    elif N == 0:
        return 0

    pos = -1
    first = list1[0]
    while pos < N:
        try:
            pos = list2.index(first, pos+1)
        except ValueError:
            break
        if (list2 + list2)[pos:pos+N] == list1:
            return pos
    return None

def is_shifted_copy_ecatmur(l1, l2):
    l1l1 = l1 * 2
    n = len(l1)
    return next((-i % n for i in range(n) if l1l1[i:i + n] == l2), None)


tests = [
    # ([], [], 0),    
    ([1, 2, 3], [1, 2, 3], 0),
    ([1, 2, 3], [3, 1, 2], 1),
    ([1, 2, 3], [2, 3, 1], 2),
    ([1, 2, 3], [3, 2, 1], None),
    ([1, 2, 3], [1], None),
    ([1, 1, 2], [2, 1, 1], 1),
    ([1,2,3,1,3,2], [1,3,2,1,2,3], 3)
    ]

for list1, list2, answer in tests:
    print(list1, list2, answer)
    assert is_shifted_copy_thkang(list1, list2) == answer
    assert is_shifted_copy_NPE(list1, list2) == answer    
    assert is_shifted_copy_ecatmur(list1, list2) == answer        

In [378]: %timeit is_shifted_copy_thkang([1, 2, 3], [3, 1, 2])
100000 loops, best of 3: 3.5 us per loop

In [377]: %timeit is_shifted_copy_ecatmur([1, 2, 3], [3, 1, 2])
100000 loops, best of 3: 2.37 us per loop

In [379]: %timeit is_shifted_copy_NPE([1, 2, 3], [3, 1, 2])
1000000 loops, best of 3: 1.13 us per loop

Note: I changed the return value in is_shifted_copy_thkang and is_shifted_copy_ecatmur so that all three versions would pass the tests in the original post.

I benchmarked the functions with and without the change and found it does not significantly affect the performance of the functions.

For example, with return i:

In [384]: %timeit is_shifted_copy_thkang([1, 2, 3], [3, 1, 2])
100000 loops, best of 3: 3.38 us per loop

With return -i % N:

In [378]: %timeit is_shifted_copy_thkang([1, 2, 3], [3, 1, 2])
100000 loops, best of 3: 3.5 us per loop
Answered By: unutbu

Searching two copies of the first list allows us to avoid performing excessive concatenation:

def is_shifted_copy(l1, l2):
    l1l1 = l1 * 2
    n = len(l1)
    return next((i for i in range(n) if l1l1[i:i + n] == l2), None)
Answered By: ecatmur

Flawed solution

Unfortunately, the solution by thkang and the modified version from unutbu fail for surprisingly simple inputs.

For example, we (correctly) get an integer result for the lists [1, 2, 1] and [1, 1, 2]:

>>> is_shifted_copy([1, 2, 1], [1, 1, 2])
2

However, when swapping the arguments, we get the (incorrect) result False:

>>> is_shifted_copy([1, 1, 2], [1, 2, 1])
False

Similarly, other lists that are shifted copies aren’t treated correctly:

>>> is_shifted_copy([1, 2, 2], [2, 1, 2])
False
>>> is_shifted_copy([1, 1, 2, 1], [1, 2, 1, 1])
False
>>> is_shifted_copy([1, 2, 1, 3, 3], [3, 1, 2, 1, 3])
False

To understand the source of this problem, let me reproduce the current version of thkang’s solution (with a modified call to next for Python 3):

import itertools

def is_shifted_copy(list1, list2):
    if len(list1) != len(list2):
        return False

    iterator = iter(list2)

    for i, item in enumerate(itertools.chain(list1, list1)):
        try:
            if item == next(iterator):
                continue
            else:
                iterator = iter(list2)  # Reset iterator
        except StopIteration:
            return i - len(list2)
    else:
        return False

Now, the following happens for a call like is_shifted_copy([1, 2, 2], [2, 1, 2]):

i item next(iterator) Result
0 1 2^ reset iterator
1 2 2^ continue
2 2 1 reset iterator
3 1 2^ reset iterator
4 2 2^ continue
5 2 1 reset iterator, return False

^ First element of list2.

As we can see, the repeated elements in the input lists cause the iterator itertools.chain(list1, list1) to be exhausted before we even have the chance to get to the final element of list2.

Possible fix

If we insist on using iterators (to prevent copying of potentially large input lists, for example), we’d have to ensure the iterator for the first list isn’t exhausted before we can make a comparison with the elements from the second list. The only approach I can currently think of that guarantees a non-exhausted first iterator is creating a new iterator for all possible shifted versions of list1:

import itertools

def is_shifted_copy(list1, list2):
    length = len(list1)

    if len(list2) != length:
        return

    for i in range(length):
        iterator1 = itertools.islice(itertools.cycle(list1), i, i + length + 1)
        iterator2 = iter(list2)

        for item in iterator1:
            try:
                if item != next(iterator2):
                    break
            except StopIteration:
                return -i % length

    return None
>>> is_shifted_copy([1, 2, 3], [1, 2, 3])
0
>>> is_shifted_copy([1, 2, 3], [3, 1, 2])
1
>>> is_shifted_copy([1, 2, 3], [2, 3, 1])
2
>>> is_shifted_copy([1, 2, 3], [3, 2, 1])
None
>>> is_shifted_copy([1, 2, 3], [1])
None
>>> is_shifted_copy([1, 1, 2], [2, 1, 1])
1
>>> is_shifted_copy([1, 2, 1], [1, 1, 2])
1
>>> is_shifted_copy([1, 1, 2], [1, 2, 1])
2
>>> is_shifted_copy([1, 2, 2], [2, 1, 2])
1
>>> is_shifted_copy([1, 1, 2, 1], [1, 2, 1, 1])
3
>>> is_shifted_copy([1, 2, 1, 3, 3], [3, 1, 2, 1, 3])
1

At best, the above algorithm finishes in O(n) steps. The algorithm performs worst, namely O(n^2) in time, for input lists with mostly duplicate elements.

Answered By: Maurits Silvis
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.