💾 Archived View for spam.works › mirrors › textfiles › computers › regan.lst captured on 2023-06-14 at 16:04:56.
-=-=-=-=-=-=-
_LZW REVISITED_ by Shawn M. Regan [LISTING ONE] /* Basic LZW Data Compression program published in DDJ October 1989 issue. * Original Author: Mark R. Nelson * Updated by: Shawn M. Regan, January 1990 * Added: - Method to clear table when compression ratio degrades * - Self adjusting code size capability (up to 14 bits) * Updated functions are marked with "MODIFIED". main() has been updated also * Compile with -ml (large model) for MAX_BITS == 14 only */ #include <stdio.h> #define INIT_BITS 9 #define MAX_BITS 14 /* Do not exceed 14 with this program */ #define HASHING_SHIFT MAX_BITS - 8 #if MAX_BITS == 14 /* Set the table size. Must be a prime */ #define TABLE_SIZE 18041 /* number somewhat larger than 2^MAX_BITS.*/ #elif MAX_BITS == 13 #define TABLE_SIZE 9029 #else #define TABLE_SIZE 5021 #endif #define CLEAR_TABLE 256 /* Code to flush the string table */ #define TERMINATOR 257 /* To mark EOF Condition, instead of MAX_VALUE */ #define FIRST_CODE 258 /* First available code for code_value table */ #define CHECK_TIME 100 /* Check comp ratio every CHECK_TIME chars input */ #define MAXVAL(n) (( 1 <<( n )) -1) /* max_value formula macro */ unsigned input_code(); void *malloc(); int *code_value; /* This is the code value array */ unsigned int *prefix_code; /* This array holds the prefix codes */ unsigned char *append_character; /* This array holds the appended chars */ unsigned char decode_stack[4000]; /* This array holds the decoded string */ int num_bits=INIT_BITS; /* Starting with 9 bit codes */ unsigned long bytes_in=0,bytes_out=0; /* Used to monitor compression ratio */ int max_code; /* old MAX_CODE */ unsigned long checkpoint=CHECK_TIME; /* For compression ratio monitoring */ main(int argc, char *argv[]) { FILE *input_file, *output_file, *lzw_file; char input_file_name[81]; /* The three buffers for the compression phase. */ code_value=malloc(TABLE_SIZE*sizeof(unsigned int)); prefix_code=malloc(TABLE_SIZE*sizeof(unsigned int)); append_character=malloc(TABLE_SIZE*sizeof(unsigned char)); if (code_value==NULL || prefix_code==NULL || append_character==NULL) { printf("Error allocating table space!\n"); exit(1); } /* Get the file name, open it, and open the LZW output file. */ if (argc>1) strcpy(input_file_name,argv[1]); else { printf("Input file name: "); scanf("%s",input_file_name); } input_file=fopen(input_file_name,"rb"); lzw_file=fopen("test.lzw","wb"); if (input_file == NULL || lzw_file == NULL) { printf("Error opening files\n"); exit(1); } max_code = MAXVAL(num_bits); /* Initialize max_value & max_code */ compress(input_file,lzw_file); /* Call compression routine */ fclose(input_file); fclose(lzw_file); free(code_value); /* Needed only for compression */ lzw_file=fopen("test.lzw","rb"); output_file=fopen("test.out","wb"); if (lzw_file == NULL || output_file == NULL) { printf("Error opening files\n"); exit(1); } num_bits=INIT_BITS; /* Re-initialize for expansion */ max_code = MAXVAL(num_bits); expand(lzw_file,output_file); /* Call expansion routine */ fclose(lzw_file); /* Clean it all up */ fclose(output_file); free(prefix_code); free(append_character); } /* MODIFIED This is the new compression routine. The first two 9-bit codes * have been reserved for communication between the compressor and expander. */ compress(FILE *input, FILE *output) { unsigned int next_code=FIRST_CODE; unsigned int character; unsigned int string_code; unsigned int index; int i, /* All purpose integer */ ratio_new, /* New compression ratio as a percentage */ ratio_old=100; /* Original ratio at 100% */ for (i=0;i<TABLE_SIZE;i++) /* Initialize the string table first */ code_value[i]=-1; printf("Compressing\n"); string_code=getc(input); /* Get the first code */ /* This is the main compression loop. Notice when the table is full we try * to increment the code size. Only when num_bits == MAX_BITS and the code * value table is full do we start to monitor the compression ratio. */ while((character=getc(input)) != (unsigned)EOF) { if (!(++bytes_in % 1000)) { /* Count input bytes and pacifier */ putchar('.'); } index=find_match(string_code,character); if (code_value[index] != -1) string_code=code_value[index]; else { if (next_code <= max_code ) { code_value[index]=next_code++; prefix_code[index]=string_code; append_character[index]=character; } output_code(output,string_code); /* Send out current code */ string_code=character; if (next_code > max_code) { /* Is table Full? */ if ( num_bits < MAX_BITS) { /* Any more bits? */ putchar('+'); max_code = MAXVAL(++num_bits); /* Increment code size then */ } else if (bytes_in > checkpoint) { /* At checkpoint? */ if (num_bits == MAX_BITS ) { ratio_new = bytes_out*100/bytes_in; /* New compression ratio */ if (ratio_new > ratio_old) { /* Has ratio degraded? */ output_code(output,CLEAR_TABLE); /* YES,flush string table */ putchar('C'); num_bits=INIT_BITS; next_code=FIRST_CODE; /* Reset to FIRST_CODE */ max_code = MAXVAL(num_bits); /* Re-Initialize this stuff */ bytes_in = bytes_out = 0; ratio_old=100; /* Reset compression ratio */ for (i=0;i<TABLE_SIZE;i++) /* Reset code value array */ code_value[i]=-1; } else /* NO, then save new */ ratio_old = ratio_new; /* compression ratio */ } checkpoint = bytes_in + CHECK_TIME; /* Set new checkpoint */ } } } } output_code(output,string_code); /* Output the last code */ if (next_code == max_code) { /* Handles special case for bit */ ++num_bits; /* increment on EOF */ putchar('+'); } output_code(output,TERMINATOR); /* Output the end of buffer code */ output_code(output,0); /* Flush the output buffer */ output_code(output,0); output_code(output,0); putchar('\n'); } /* UNCHANGED from original * This is the hashing routine. */ find_match(int hash_prefix, unsigned int hash_character) { int index, offset; index = (hash_character << HASHING_SHIFT ) ^ hash_prefix; if (index == 0 ) offset=1; else offset = TABLE_SIZE - index; while(1) { if (code_value[index] == -1 ) return(index); if (prefix_code[index] == hash_prefix && append_character[index] == hash_character) return(index); index -= offset; if (index < 0) index += TABLE_SIZE; } } /* MODIFIED This is the modified expansion routine. It must now check for the * CLEAR_TABLE code and know when to increment the code size. */ expand(FILE *input, FILE *output) { unsigned int next_code=FIRST_CODE; unsigned int new_code; unsigned int old_code; int character, counter=0, clear_flag=1; /* Need to clear the code value array */ unsigned char *string; char *decode_string(unsigned char *buffer, unsigned int code); printf("Expanding\n"); while((new_code=input_code(input)) != TERMINATOR) { if (clear_flag) { /* Initialize or Re-Initialize */ clear_flag=0; old_code=new_code; /* The next three lines have been moved */ character=old_code; /* from the original */ putc(old_code,output); continue; } if (new_code == CLEAR_TABLE) { /* Clear string table */ clear_flag=1; num_bits=INIT_BITS; next_code=FIRST_CODE; putchar('C'); max_code = MAXVAL(num_bits); continue; } if (++counter == 1000) { /* Pacifier */ counter=0; putchar('.'); } if (new_code >= next_code) { /* Check for string+char+string */ *decode_stack=character; string=decode_string(decode_stack+1,old_code); } else string=decode_string(decode_stack,new_code); character = *string; /* Output decoded string in reverse */ while (string >= decode_stack) putc(*string--,output); if (next_code <= max_code) { /* Add to string table if not full */ prefix_code[next_code]=old_code; append_character[next_code++]=character; if (next_code == max_code && num_bits < MAX_BITS) { putchar('+'); max_code = MAXVAL(++num_bits); } } old_code=new_code; } putchar('\n'); } /* UNCHANGED from original * Decode a string from the string table, storing it in a buffer. * The buffer can then be output in reverse order by the expansion * program. */ char *decode_string(unsigned char *buffer, unsigned int code) { int i=0; while(code > 255 ) { *buffer++ = append_character[code]; code=prefix_code[code]; if (i++ >= 4000 ) { printf("Error during code expansion\n"); exit(1); } } *buffer=code; return(buffer); } /* UNCHANGED from original * Input a variable length code. */ unsigned input_code(FILE *input) { unsigned int return_value; static int input_bit_count=0; static unsigned long input_bit_buffer=0L; while (input_bit_count <= 24 ) { input_bit_buffer |= (unsigned long) getc(input) << (24 - input_bit_count); input_bit_count += 8; } return_value=input_bit_buffer >> (32-num_bits); input_bit_buffer <<= num_bits; input_bit_count -= num_bits; return(return_value); } /* MODIFIED Output a variable length code. */ output_code(FILE *output, unsigned int code) { static int output_bit_count=0; static unsigned long output_bit_buffer=0L; output_bit_buffer |= (unsigned long) code << (32 - num_bits - output_bit_count); output_bit_count += num_bits; while (output_bit_count >= 8) { putc(output_bit_buffer >> 24, output); output_bit_buffer <<= 8; output_bit_count -= 8; bytes_out++; /* ADDED for compression monitoring */ } }