💾 Archived View for spam.works › mirrors › textfiles › programming › russell.lst captured on 2023-06-16 at 20:12:05.

View Raw

More Information

-=-=-=-=-=-=-

A GENERIC HEAPSORT ALGORITHM IN C
by Stephen Russell

[LISTING ONE]

/*
 *  The Heapsort to sort an array of n integers.
 */

static
fixheap(h, i, n)
int *h;
unsigned i, n;
{
	unsigned k;
	int	 tmp;

	while ((k = 2 * i) <= n)		/* h[k] = left child of h[i] */
	{
		/*  Find maximum of left and right children */

		if (k != n && h[k+1] > h[k])
			++k;			/* right child is greater */

		/* Compare greater of children to parent */

		if (h[i] >= h[k])
			return;

		/* Parent is less than child, so swap */

		tmp = h[k]; h[k] = h[i]; h[i] = tmp;

		i = k;				/* move down tree */
	}
}


hsort(h, n)
int *h;
unsigned n;
{
	unsigned i;
	int	 tmp;

	--h;			/* adjust for zero-origin arrays in C */

	for (i = n/2; i > 1; --i)
		fixheap(h, i, n);	/* build heap, except for h[1] */

	while (n > 1)
	{
		fixheap(h, 1, n);	/* move max to h[1] */
		tmp = h[1];		/* move max to final position */
		h[1] = h[n];
		h[n] = tmp;
		--n;			/* reduce size of heap */
	}
}

[LISTING TWO]


/*
 *	Generic Heapsort, derived from listing 1.
 */


#define	H(k)	(h + k * size)

static
swap(p1, p2, n)			/* swap n bytes */
char *p1, *p2;
unsigned n;
{
	char	tmp;

	while (n-- != 0)
	{
		tmp = *p1; *p1++ = *p2; *p2++ = tmp;
	}
}

static
fixheap(h, size, cmp, i, n)
char *h;
unsigned size, i, n;
int (*cmp)();
{
	unsigned k;

	while ((k = 2 * i) <= n)
	{
		if (k != n && (*cmp)(H(k+1), H(k)) > 0)
			++k;

		if ((*cmp)(H(i), H(k)) >= 0)
			return;

		swap(H(i), H(k), size);
		i = k;
	}
}

hsort(h, n, size, cmp)
char *h;
unsigned n, size;
int (*cmp)();
{
	unsigned i;

	h -= size;

	for (i = n/2; i > 1; --i)
		fixheap(h, size, cmp, i, n);

	while (n > 1)
	{
		fixheap(h, size, cmp, 1, n);
		swap(H(1), H(n), size);
		--n;
	}
}


[LISTING THREE]

/*
 *  Generic Heapsort.
 *
 *  Synopsis:
 *	hsort(char *base, unsigned n, unsigned size, int (*fn)())
 *  Description:
 *	Hsort sorts the array of `n' items which starts at address `base'.
 *	The size of each item is as given.  Items are compared by the function
 *	`fn', which is passed pointers to two items as arguments. The function
 *	should return < 0 if item1 < item2, == 0 if item1 == item2, and > 0
 *	if item1 > item2.
 *  Version:
 *	1988 April 28
 *  Author:
 *	Stephen Russell, Department of Computer Science,
 *	University of Sydney, 2006
 *	Australia.
 */

#ifdef INLINE

#define	swap(p1, p2, n)	{\
	register char *_p1, *_p2;\
	register unsigned _n;\
	register char _tmp;\
\
	for (_p1 = p1, _p2 = p2, _n = n; _n-- > 0; )\
	{\
		_tmp = *_p1; *_p1++ = *_p2; *_p2++ = _tmp;\
	}\
}\

#else

/*
 *   Support routine for swapping elements of the array.
 */

static
swap(p1, p2, n)
register char	 *p1, *p2;
register unsigned n;
{
	register char	ctmp;

	/*
	 *  On machines with no alignment restrictions for int's,
	 *  the following loop may improve performance if moving lots
	 *  of data. It has been commented out for portability.

	 register int	itmp;

	 for ( ; n > sizeof(int); n -= sizeof(int))
	 {
		itmp = *(int *)p1;
		*(int *)p1 = *(int *)p2;
		p1 += sizeof(int);
		*(int *)p2 = itmp;
		p2 += sizeof(int);
	}

	*/

	while (n-- != 0)
	{
		ctmp = *p1; *p1++ = *p2; *p2++ = ctmp;
	}
}

#endif

/*
 *	To avoid function calls in the inner loops, the code responsible for
 *	constructing a heap from (part of) the array has been expanded inline.
 *	It is possible to convert this common code to a function. The three
 *	parameters base0, cmp and size are invariant - only the size of the
 *	gap and the high bound of the array change. In phase 1, gap decreases
 *	while hi is fixed. In phase 2, gap == size, and hi decreases. The
 *	variables p, q, and g are only used in this common code.
 */

hsort(base, n, size, cmp)
char *base;
unsigned n;
unsigned size;
int (*cmp)();
{
	register char	 *p, *q, *base0, *hi;
	register unsigned gap, g;

	if (n < 2)
		return;

	base0 = base - size;		/* set up address of h[0] */

	/*
	 *  The gap is the distance, in bytes, between h[0] and h[i],
	 *  for some i. It is also the distance between h[i] and h[2*i];
	 *  that is, the distance between a node and its left child.
	 *  The initial node of interest is h[n/2] (the rightmost
	 *  interior node), so gap is set accordingly. The following is
	 *  the only multiplication needed.
	 */

	gap = (n >> 1) * size; 		/* initial gap is n/2*size */
	hi = base0 + gap + gap; 	/* calculate address of h[n] */
	if (n & 1)
		hi += size;		/* watch out for odd arrays */

	/*
	 *  Phase 1: Construct heap from random data.
	 *
	 *  For i = n/2 downto 2, ensure h[i] is greater than its
	 *  children h[2*i] and h[2*i+1]. By decreasing 'gap' at each
	 *  iteration, we move down the heap towards h[2]. The final step
	 *  of making h[1] the maximum value is done in the next phase.
	 */

	for ( ; gap != size; gap -= size)
	{
		/*  fixheap(base0, size, cmp, gap, hi) */

		for (p = base0 + (g = gap); (q = p + g) <= hi; p = q)
		{
			g += g;		/* double gap for next level */

			/*
			 *  Find greater of left and right children.
			 */

			if (q != hi && (*cmp)(q + size, q) > 0)
			{
				q += size;	/* choose right child */
				g += size;	/* follow right subtree */
			}

			/*
			 *  Compare with parent.
			 */

			if ((*cmp)(p, q) >= 0)
				break;		/* order is correct */
	
			swap(p, q, size);	/* swap parent and child */
		}
	}

	/*
	 *  Phase 2: Each iteration makes the first item in the
	 *  array the maximum, then swaps it with the last item, which
	 *  is its correct position. The size of the heap is decreased
	 *  each iteration. The gap is always "size", as we are interested
	 *  in the heap starting at h[1].
	 */

	for ( ; hi != base; hi -= size)
	{
		/* fixheap(base0, size, cmp, gap (== size), hi) */

		p = base;		/* == base0 + size */
		for (g = size; (q = p + g) <= hi; p = q)
		{
			g += g;
			if (q != hi && (*cmp)(q + size, q) > 0)
			{
				q += size;
				g += size;
			}

			if ((*cmp)(p, q) >= 0)
				break;
	
			swap(p, q, size);
		}

		swap(base, hi, size);		/* move largest item to end */
	}
}

[LISTING FOUR]


/*
 *	Use hsort() to sort an array of strings read from input.
 */

#include	<stdio.h>


#define	MAXN	500
#define	MAXSTR	1000


cmp(p1, p2)
char **p1, **p2;
{
	return strcmp(*p1, *p2);
}

static	char	*string[MAXN];
static	char	buf[MAXSTR];

extern	char	*gets();
extern	char	*malloc();


main()
{
	char	*p;
	int	i, n;

	for (n = 0; gets(buf); ++n)
	{
		if (n == MAXN)
		{
			fprintf(stderr, "Too many strings\n");
			exit(1);
		}

		p = malloc(strlen(buf) + 1);
		if (p == (char *)NULL)
		{
			fprintf(stderr, "Out of memory\n");
			exit(2);
		}

		strcpy(string[n] = p, buf);
	}

	hsort(string, n, sizeof string[0], cmp);

	for (i = 0; i < n; ++i)
		puts(string[i]);

	exit(0);
}