June 30, 2012

Searching for a position of set bits in a variable

In a deamon that I have created in Badoo I need to search for a positions of a bits in a 32 bit variable. The simplest solution is to look at each bit, but there is a faster solution using GCC built in function __builtin_ctzl:

Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.

So I wrote test to compare speed of these two approaches:

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <time.h>

#define HOWMUCH 100000000
#define RANDOMBITS 4

static uint64_t stats[32];
static uint64_t stats2[32];
static uint32_t flags[HOWMUCH];

int main()
{
	time_t start;

	/* generate flags */

	printf("generating flags... ");

	start = time(NULL);

	for (uint64_t i = 0; i < HOWMUCH; i++) {
		/* create random flag */
		for (uint32_t j = 0; j < RANDOMBITS; j++) {
			flags[i] |= 1 << (random() % 32);
		}
	}

	printf("%ld s\n", time(NULL) - start);

	printf("using cycle... ");

	start = time(NULL);

	for (uint64_t i = 0; i < HOWMUCH; i++) {
		for (int x = 0; x < 32; x++) {
			if (flags[i] & (1 << x)) {
				stats[x]++;
			}
		}
	}

	printf("%ld s\n", time(NULL) - start);

	for (int x = 0; x < 32; x++) {
		printf("%ld ", stats[x]);
	}
	printf("\n");

	printf("using builtin... ");

	start = time(NULL);

	for (uint64_t i = 0; i < HOWMUCH; i++) {
		uint32_t flag = flags[i];
		int offset = 0;

		while (flag > 0) {
			int zeroes = __builtin_ctzl(flag);
			int moveby = zeroes+1;

			stats2[offset + zeroes]++;

			if (moveby == 32) {
				flag = 0;
			} else {
				flag >>= moveby;
				offset += moveby;
			}
		}
	}

	printf("%ld s\n", time(NULL) - start);

	for (int x = 0; x < 32; x++) {
		printf("%ld ", stats2[x]);
	}
	printf("\n");

	for (int x = 0; x < 32; x++) {
		if (stats[x] != stats2[x]) {
			printf("%i bit NOT SAME (%ld != %ld)\n", x, stats[x], stats2[x]);
			return 0;
		}
	}
	printf("SAME\n");


	return 0;
}

Here is the result of testing on my VirtualBox:

marko@marko-virtual ~/badoo/temp $ gcc builtin_test.c -o test -O3 --std=gnu99
marko@marko-virtual ~/badoo/temp $ ./test
generating flags... 5 s
using cycle... 8 s
11929711 11926960 11926934 11925219 11928833 11923393 11923177
11924621 11924564 11928647 11921652 11924644 11926105 11929288
11929690 11924292 11923555 11922419 11925611 11924387 11928793
11926351 11928699 11927228 11922918 11921752 11927864 11926735
11927322 11923573 11932816 11930695
using builtin... 1 s
11929711 11926960 11926934 11925219 11928833 11923393 11923177
11924621 11924564 11928647 11921652 11924644 11926105 11929288
11929690 11924292 11923555 11922419 11925611 11924387 11928793
11926351 11928699 11927228 11922918 11921752 11927864 11926735
11927322 11923573 11932816 11930695
SAME
marko@marko-virtual ~/badoo/temp $ gcc builtin_test.c -o test -O0 -g3
--std=gnu99
marko@marko-virtual ~/badoo/temp $ gcc builtin_test.c -o test -O0 --std=gnu99
marko@marko-virtual ~/badoo/temp $ ./test
generating flags... 5 s
using cycle... 14 s
11929711 11926960 11926934 11925219 11928833 11923393 11923177
11924621 11924564 11928647 11921652 11924644 11926105 11929288
11929690 11924292 11923555 11922419 11925611 11924387 11928793
11926351 11928699 11927228 11922918 11921752 11927864 11926735
11927322 11923573 11932816 11930695
using builtin... 3 s
11929711 11926960 11926934 11925219 11928833 11923393 11923177
11924621 11924564 11928647 11921652 11924644 11926105 11929288
11929690 11924292 11923555 11922419 11925611 11924387 11928793
11926351 11928699 11927228 11922918 11921752 11927864 11926735
11927322 11923573 11932816 11930695
SAME

While writing test, I have stumbled upon issue with bit shifting. It happened that if you right shift by the size of a variable, you get not 0, but same number. So I have additional if statement for case when I have to shift by 32.