Page 1 of 1

Compare a segment of an array (with wildcards)

Posted: Sun Oct 01, 2006 7:10 am
by Chris Corbyn
I'm trying to check if a slice of an array exists in a (possibly) larger array. The slice we look for can contain wilcard values represented by "*". A wildcard constitutes NONE or MORE values so if it's midway in the sequence, or at the start we can keep skipping non-matching values until we find a match. If the wildcard is at the end it can simply be disregarded since it's not worth checking.

I've been playing around with this all night and still can't quite get it right so I'm throwing it out here in case on of you can. Try to avoid using PHP-specific functions since this is needed in JavaScript and PHP. I can translate it if needs be though :)

Here's a test case the clarify what I mean:

Code: Select all

function array_sequence_compare($wanted_sequence=array(), $master_sequence=array())
{
    //
}

class TestOfSequenceMatching extends UnitTestCase
{
    //A sequence to check against
    protected $masterArray = array('x', 'y', 'x', 'x', 'x', 'z', 'a', 'a', 'b', 'a', 'c', 'd', 'e', 'f', 'f', 'f', 'g');
    
    public function testCanFindStraightForwardSequences()
    {
        $this->assertTrue(array_sequence_compare(array('x', 'y', 'x', 'x', 'x', 'z', 'a', 'a', 'b', 'a', 'c', 'd', 'e', 'f', 'f', 'f', 'g'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('a', 'a', 'b'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('x'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('g'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('d', 'e', 'f', 'f', 'f', 'g'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('b', 'a'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('z', 'a', 'a', 'b', 'a'), $this->masterArray));
    }
    
    public function testReturnsFalseWhenStraightForwardSequenceNotFound()
    {
        $this->assertFalse(array_sequence_compare(array('a', 'g'), $this->masterArray));
        $this->assertFalse(array_sequence_compare(array('h'), $this->masterArray));
        $this->assertFalse(array_sequence_compare(array('c', 'd', 'e', 'f', 'g'), $this->masterArray));
        $this->assertFalse(array_sequence_compare(array('x', 'x', 'a'), $this->masterArray));
        $this->assertFalse(array_sequence_compare(array('b', 'a', 'c', 'd', 'z'), $this->masterArray));
        $this->assertFalse(array_sequence_compare(array('x', 'f'), $this->masterArray));
    }

    public function testReturnsTrueForEmptyArray()
    {
        $this->assertTrue(array_sequence_compare(array(), $this->masterArray));
    }

    public function testReturnsTrueForWilcardsAtStart()
    {
        $this->assertTrue(array_sequence_compare(array('*'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('*', 'd', 'e', 'f'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('*', 'x', 'y', 'x'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('*', 'a', 'b', 'a', 'c', 'd'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('*', 'x', 'y', 'x', 'x', 'x', 'z', 'a', 'a', 'b', 'a', 'c', 'd', 'e', 'f', 'f', 'f', 'g'), $this->masterArray));
    }

    public function testCanSkipZeroOrMoreTimesForWilcards()
    {
        $this->assertTrue(array_sequence_compare(array('x', '*', 'a'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('d', 'e', 'f', '*', 'g'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('x', '*', 'g'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('c', 'd', '*', 'e'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('x', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', 'x'), $this->masterArray));
    }
    
    public function testReturnsFalseWhenWildcardNotInSuitablePosition()
    {
        $this->assertFalse(array_sequence_compare(array('e', '*', 'x'), $this->masterArray));
        $this->assertFalse(array_sequence_compare(array('f', 'f', '*', 'a'), $this->masterArray));
        $this->assertFalse(array_sequence_compare(array('b', 'a', 'c', 'd', '*', 'z'), $this->masterArray));
        $this->assertFalse(array_sequence_compare(array('a', '*', 'c', '*', 'z'), $this->masterArray));
    }

    public function testReturnsTrueWhenWilcardAtEndOfAnyValidSequence()
    {
        $this->assertTrue(array_sequence_compare(array('x', 'y', 'x', 'x', 'x', 'z', 'a', 'a', 'b', 'a', 'c', 'd', 'e', 'f', 'f', 'f', 'g', '*'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('a', 'a', 'b', '*'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('x', '*'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('g', '*'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('d', 'e', 'f', 'f', 'f', 'g', '*'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('b', 'a', '*'), $this->masterArray));
        $this->assertTrue(array_sequence_compare(array('z', 'a', 'a', 'b', 'a', '*'), $this->masterArray));
    }
}
Kudos to anyone who can manage this :? It seemed simple upon first thought but it's the wildacards that cause headaches :(

EDIT | Test case fixed :oops:

Posted: Sun Oct 01, 2006 7:33 am
by Mordred
For starters, compress such sequences of wildcards:
array('x', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', 'x') <--> array('x', '*', 'x'), and remove trailing wildcards.

If your "needle" arrays can contain more than one wildcard, then you must use recursion for matching them, where each wildcard accounts for one level of recursion. Is not using recursion the source of your troubles? Otherwise the algo is pretty straightforward and has no apparent pitfalls:

Try to match the i-th needle char with the j-th haystack char
1. If it doesn't match, i=0, j=j+1
2. If it isn't a wildcard, i=i+1, j=j+1, continue
3. If it'a a wildcard, it can "eat" all haystack chars from j onward, so we loop them in an array:
for jj = j to count(haystack)
MatchRecursively(the needle from i+1 onward, the haystack from jj onward)


There are smarter algos for substring (which is effectively what you need, albeit with arbitrary elements instead of chars) matching I believe, but the simplest one should suffice unless your haystack is really big.

Posted: Sun Oct 01, 2006 7:43 am
by Chris Corbyn
You're right. I wasn't using recursion, I was just nesting a loop over the needle inside a loop over the haystack and repeatedly trying to find corresponding sequences (ignoring '*').

I was compressing multiple wildcards -- well actually, I was checking if the current element is a wildacrd and then if the next one is a wildcard the loop continues without executing the logic inside it. It was all getting a little over-complex 8O

I'll give a recursive shot a go. Cheers :)

Posted: Sun Oct 01, 2006 9:15 am
by Weirdan
Could your arrays contain non-scalar types?

Posted: Sun Oct 01, 2006 11:30 am
by Ollie Saunders
Out of interest what do you need this for?

Posted: Sun Oct 01, 2006 12:20 pm
by Chris Corbyn
Weirdan wrote:Could your arrays contain non-scalar types?
No, they will only be 1-dimensional arrays containing nothing but string values.
ole wrote:Out of interest what do you need this for?
A mock object library I'm adding something to. I'm adding an assertCallSequence() method so you can assert something like:

Code: Select all

$mock->assertCallSequence(array('methodOne', 'methodTwo', '*', 'methodOne'), 'Some comment');
I think I'm getting there. I just went out for lunch so I'll have to get my head back into the code again.

Posted: Sun Oct 01, 2006 12:30 pm
by Ollie Saunders
If you fancy a massive challenge you could write a regular expression engine for arrays. So something like this:

Code: Select all

$array = array('foo', 'a', 'b', 'B', 'a', 10, 8, 'bar');
// comma in pattern used to separate element literals
$array = areg_replace("/^['a','b']+10,8/i", array('woo!'), $array);

Code: Select all

$this->assertEqual($array, array('foo', 'woo!', 'bar');
Now that would be fun :)

Posted: Sun Oct 01, 2006 1:33 pm
by Chris Corbyn
OK here it is, using Mordred's logic (roughly). I've done it in JS first but a PHP port should be easy... I'll see if I can work it into SimpleTest's mock library:

Code: Select all

/**
 * Compare sub_sequences of arrays, with wilcard abilities
 * @param {array} needle
 * @param {array} haystack
 * @param {boolean} dont_compress_wildcards
 * @returns {boolean}
 * @private
 */
function __JSTSequenceCompare(needle, haystack, noCompress)
{
	//Nothing to check, this is always valid
	if (!needle.length) return true;
	
	//Turns to 1 if starting on a wildcard
	var needlePos = 0;
	
	if (!noCompress) //For optimization
	{
		var cNeedle = new Array();
		var lastEl = false;
		//Compress duplicate wildcards
		for (var x in needle)
		{
			if (needle[x] != '*') cNeedle.push(needle[x]); //Not a wildcard, add to stack
			else if (lastEl != '*') cNeedle.push(needle[x]); //Wilcard, only stack if last item is not a wildcard
			
			lastEl = needle[x]; //Store last item
		}
		needle = cNeedle; //Replace needle with compressed needle
	}
	
	//At end of sequence assertion and the element is just a wildcard -- nothing more to do
	if ((needle[needlePos] == '*') && needle.length == 1) return true;
	//Skip wildcard
	else if (needle[needlePos] == '*') needlePos = 1;
	
	//Try to find a matching sequence
	for (var i = 0; i < haystack.length; i++)
	{
		//If start of matching sequence
		if (needle[needlePos] == haystack[i])
		{
			var offset = 1; //Then skip the item we already matched
			var matches = needlePos+1;
			
			for (var j = needlePos+1; j < needle.length; j++)
			{ //And then look at the remainder of the needle
				if (needle[j] == '*') //If it's a wildcard, recurse
				{
					//Remove stuff we already looked at
					var newNeedle = new Array();
					for (var x = j+1; x < needle.length; x++)
					{
						newNeedle.push(needle[x]);
					}
					var newHaystack = new Array();
					for (var y = i+offset; y < haystack.length; y++)
					{
						newHaystack.push(haystack[y]);
					}
					//See if the remainder matches
					if (__JSTSequenceCompare(newNeedle, newHaystack, true)) return true;
				}
				else //Not a wildcard
				{
					//If it's a match, iterate in loop
					if (needle[j] == haystack[i+offset])
					{
						matches++;
						offset++;
					}
					//If we're not currently skipping a wildcard then we've failed at this point
					// but we can move along the haystack
					else if (needlePos == 0) break;
				}
			}
			//If the number of matches is same as size of needle we've succeeded
			if (matches == needle.length) return true;
		}
	}
	//Default to failing
	return false;
}
Test Case:

Code: Select all

function TestOfSequenceChecker()
{
	this.haystack = new Array('x', 'y', 'z', 'a', 'e', 'e', 'f', 'e', 'b', 'c', 'g', 'i', 'z', 'x', 'j');
	
	this.testCanCompareStraightSequences = function()
	{
		this.assertTrue(__JSTSequenceCompare(new Array('x', 'y', 'z'), this.haystack));
		this.assertTrue(__JSTSequenceCompare(new Array('e', 'f', 'e', 'b', 'c', 'g'), this.haystack));
		this.assertTrue(__JSTSequenceCompare(new Array('x', 'y', 'z', 'a', 'e', 'e', 'f', 'e', 'b', 'c', 'g', 'i', 'z', 'x', 'j'), this.haystack));
		this.assertTrue(__JSTSequenceCompare(new Array('b', 'c', 'g', 'i'), this.haystack));
	}
	
	this.testFailsWhenStraightSequenceIsWrong = function()
	{
		this.assertFalse(__JSTSequenceCompare(new Array('x', 'y', 'z', 'f'), this.haystack));
		this.assertFalse(__JSTSequenceCompare(new Array('f', 'j'), this.haystack));
		this.assertFalse(__JSTSequenceCompare(new Array('g', 'z', 'x'), this.haystack));
		this.assertFalse(__JSTSequenceCompare(new Array('a', 'e', 'f'), this.haystack));
	}
	
	this.testCanIgnoreWildcards = function()
	{
		this.assertTrue(__JSTSequenceCompare(new Array('e', 'f', '*', '*', '*', 'g'), this.haystack));
		this.assertTrue(__JSTSequenceCompare(new Array('x', '*', 'z'), this.haystack));
		this.assertTrue(__JSTSequenceCompare(new Array('b', '*', '*', 'i'), this.haystack));
		this.assertTrue(__JSTSequenceCompare(new Array('x', 'y', '*', 'a', 'e', 'e', 'f', '*', 'c', 'g', 'i', 'z', 'x', 'j'), this.haystack));
	}
	
	this.testFailsOnInvalidWilcardPosition = function()
	{
		this.assertFalse(__JSTSequenceCompare(new Array('x', '*', 'z', 'f'), this.haystack));
		this.assertFalse(__JSTSequenceCompare(new Array('a', 'e', 'i', '*', 'j'), this.haystack));
		this.assertFalse(__JSTSequenceCompare(new Array('e', 'g', '*', '*'), this.haystack));
		this.assertFalse(__JSTSequenceCompare(new Array('x', 'y', 'j', 'z', '*', '*', '*', 'f', '*', 'b', 'c', 'g', 'i', 'z', 'x'), this.haystack));
	}
	
	this.testReturnsTrueOnLoneWildcard = function()
	{
		this.assertTrue(__JSTSequenceCompare(new Array('*'), this.haystack));
	}
	
	this.testReturnsTrueForEmptyArray = function()
	{
		this.assertTrue(__JSTSequenceCompare(new Array(), this.haystack));
	}
	
	this.testReturnsTrueForWildCardsAtEndOfValidSequence = function()
	{
		this.assertTrue(__JSTSequenceCompare(new Array('x', 'y', 'z', '*'), this.haystack));
		this.assertTrue(__JSTSequenceCompare(new Array('e', 'f', 'e', 'b', 'c', 'g', '*'), this.haystack));
		this.assertTrue(__JSTSequenceCompare(new Array('x', 'y', 'z', 'a', 'e', 'e', 'f', 'e', 'b', 'c', 'g', 'i', 'z', 'x', 'j', '*'), this.haystack));
		this.assertTrue(__JSTSequenceCompare(new Array('b', 'c', 'g', 'i', '*'), this.haystack));
	}
	
	this.testReturnsTrueWhenWildcardAtStartOfValidSequence = function()
	{
		this.assertTrue(__JSTSequenceCompare(new Array('*', 'x', 'y', 'z'), this.haystack));
		this.assertTrue(__JSTSequenceCompare(new Array('*', 'e', 'f', 'e', 'b', 'c', 'g'), this.haystack));
		this.assertTrue(__JSTSequenceCompare(new Array('*', 'x', 'y', 'z', 'a', 'e', 'e', 'f', 'e', 'b', 'c', 'g', 'i', 'z', 'x', 'j'), this.haystack));
		this.assertTrue(__JSTSequenceCompare(new Array('*', 'b', 'c', 'g', 'i'), this.haystack));
	}
}
It can probably be optimized a little bit but it's about half the number of lines I had it when it was initially not working 8O

Thanks Mordred :)