/*
 * pars_data_8bits.cpp
 *
 *  Created on: Jan 30, 2014
 *      Author: richer
 */

#include <xmmintrin.h>

int parsimony_sse2_intrinsics(ResidueType *x, ResidueType *y, ResidueType *z, int size) {
	int i, changes=0;

	__m128i _x, _y, _x_and_y, _x_or_y, _zero, _cmp;

	_zero = _mm_set_epi8(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);

	for (i = 0; i < (size & (~15)); i+=16) {
		_x = _mm_load_si128((__m128i *) &x[i]);
		_y = _mm_load_si128((__m128i *) &y[i]);
		_x_and_y = _mm_and_si128(_x, _y);
		_x_or_y = _mm_or_si128(_x, _y);
		_cmp = _mm_cmpeq_epi8(_zero, _x_and_y);
		uint32_t r = _mm_movemask_epi8(_cmp);
#ifdef CPU_POPCNT_COMPLIANT
		changes += _mm_popcnt_u32(r);
#else
		changes += __builtin_popcount(r);
#endif
		_x = _mm_andnot_si128(_cmp, _x_and_y);
		_y = _mm_and_si128(_cmp, _x_or_y);
		_x = _mm_or_si128(_y, _x);
		_mm_store_si128( (__m128i *) &z[i], _x);
	}

	for ( ; i<size; ++i) {
		z[i] = x[i] & y[i];
		if (z[i] == 0) {
			z[i] = x[i] | y[i];
			++changes;
		}
	}

	return changes;
}

