99 ways to program a hex, Part 22: C89, const correctness, assertive, system calls, full buffering

So yesterday [1] I presented a non-portable version that was quite a bit faster than the portable version, but I'm not quite done yet. That version just buffered a line at a time—today's version buffers nearly 8k (Kilobyte) worth of data (it's not exact, but it's close enough) between calls to write().

>
```
/*************************************************************************
*
* Copyright 2012 by Sean Conner. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Comments, questions and criticisms can be sent to: sean@conman.org
*
*************************************************************************/
/* Style: C89, const correctness, assertive, system calls, full buffering */
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#define LINESIZE 16
/********************************************************************/
extern const char *sys_errlist[];
extern int sys_nerr;
static void do_dump (const int,const int);
static size_t dump_line (char **const,unsigned char *,size_t,const unsigned long);
static void hexout (char *const,unsigned long,size_t,const int);
static void myperror (const char *const);
static size_t myread (const int,char *,size_t);
static void mywrite (const int,const char *const,const size_t);
/********************************************************************/
int main(const int argc,const char *const argv[])
{
if (argc == 1)
do_dump(STDIN_FILENO,STDOUT_FILENO);
else
{
int i;
for (i = 1 ; i < argc ; i++)
{
int fhin;
fhin = open(argv[i],O_RDONLY);
if (fhin == -1)
{
myperror(argv[i]);
continue;
}
mywrite(STDOUT_FILENO,"-----",5);
mywrite(STDOUT_FILENO,argv[i],strlen(argv[i]));
mywrite(STDOUT_FILENO,"-----\n",6);
do_dump(fhin,STDOUT_FILENO);
if (close(fhin) < 0)
myperror(argv[i]);
}
}
return EXIT_SUCCESS;
}
/************************************************************************/
static void do_dump(const int fhin,const int fhout)
{
unsigned char buffer[4096];
char outbuffer[75 * 109];
char *pout;
unsigned long off;
size_t bytes;
size_t count;
assert(fhin >= 0);
assert(fhout >= 0);
memset(outbuffer,' ',sizeof(outbuffer));
off = 0;
count = 0;
pout = outbuffer;
while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0)
{
unsigned char *p = buffer;
for (p = buffer ; bytes > 0 ; )
{
size_t amount;
amount = dump_line(&pout,p,bytes,off);
p += amount;
bytes -= amount;
off += amount;
count++;
if (count == 109)
{
mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
memset(outbuffer,' ',sizeof(outbuffer));
count = 0;
pout = outbuffer;
}
}
}
if ((size_t)(pout - outbuffer) > 0)
mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
}
/********************************************************************/
static size_t dump_line(
char **const pline,
unsigned char *p,
size_t bytes,
const unsigned long off
)
{
char *line;
char *dh;
char *da;
size_t count;
assert(pline != NULL);
assert(*pline != NULL);
assert(p != NULL);
assert(bytes > 0);
line = *pline;
hexout(line,off,8,':');
if (bytes > LINESIZE)
bytes = LINESIZE;
p += bytes;
dh = &line[10 + bytes * 3];
da = &line[58 + bytes];
for (count = 0 ; count < bytes ; count++)
{
p --;
da --;
dh -= 3;
if ((*p >= ' ') && (*p <= '~'))
*da = *p;
else
*da = '.';
hexout(dh,(unsigned long)*p,2,' ');
}
line[58 + count] = '\n';
*pline = &line[59 + count];
return count;
}
/**********************************************************************/
static void hexout(char *const dest,unsigned long value,size_t size,const int padding)
{
assert(dest != NULL);
assert(size > 0);
assert((padding >= ' ') && (padding <= '~'));
dest[size] = padding;
while(size--)
{
dest[size] = (char)((value & 0x0F) + '0');
if (dest[size] > '9') dest[size] += 7;
value >>= 4;
}
}
/************************************************************************/
static void myperror(const char *const s)
{
int err = errno;
assert(s != NULL);
mywrite(STDERR_FILENO,s,strlen(s));
mywrite(STDERR_FILENO,": ",2);
if (err > sys_nerr)
mywrite(STDERR_FILENO,"(unknown)",9);
else
mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err]));
mywrite(STDERR_FILENO,"\n",1);
}
/************************************************************************/
static size_t myread(const int fh,char *buf,size_t size)
{
size_t amount = 0;
assert(fh >= 0);
assert(buf != NULL);
assert(size > 0);
while(size > 0)
{
ssize_t bytes;
bytes = read(fh,buf,size);
if (bytes < 0)
{
myperror("read()");
exit(EXIT_FAILURE);
}
if (bytes == 0)
break;
amount += bytes;
size -= bytes;
buf += bytes;
}
return amount;
}
/*********************************************************************/
static void mywrite(const int fh,const char *const msg,const size_t size)
{
assert(fh >= 0);
assert(msg != NULL);
assert(size > 0);
if (write(fh,msg,size) < (ssize_t)size)
{
if (fh != STDERR_FILENO)
myperror("output");
exit(EXIT_FAILURE);
}
}
/***********************************************************************/
```

And that makes all the difference. The portable vesrsion [2]:

>
```
[spc]lucy:~/projects/99/src>time ./12 ~/bin/firefox/libxul.so >/dev/null
real 0m4.985s
user 0m4.969s
sys 0m0.015s
```

Our first stab at a non-portable, but possibly faster version [3]:

>
```
[spc]lucy:~/projects/99/src>time ./20 ~/bin/firefox/libxul.so >/dev/null
real 0m2.936s
user 0m1.511s
sys 0m1.425s
```

The “it's quite a bit faster” version [4]:

>
```
[spc]lucy:~/projects/99/src>time ./21 ~/bin/firefox/libxul.so >/dev/null
real 0m0.957s
user 0m0.645s
sys 0m0.313s
```

And finally, the punchline—today's version:

>
```
[spc]lucy:~/projects/99/src>time ./22 ~/bin/firefox/libxul.so >/dev/null
real 0m0.460s
user 0m0.448s
sys 0m0.012s
```

And yes, that's the real output—1/10 the time of the portable version with a similar amount of time in the kernel.

Frankly, I was a bit surprised at these results—not that the non-portable version was faster (that's almost a given) but the magnitude of the results. I didn't think the standard C library had that much overhead. I was expecting easily a percentage increase in speed, but even twice would have been unexpected, but ten times faster?

Wow.

Increasing the size of the buffer past what I have probably won't help all that much, and in fact, when I doubled the buffer size:

>
```
[spc]lucy:~/projects/99/src>time ./22 ~/bin/firefox/libxul.so >/dev/null
real 0m0.592s
user 0m0.582s
sys 0m0.010s
```

The timing difference could be due to cache effects, maybe?

So I think we've maxed out the speed at which this program will run. As a test, I profiled the code to see if there was anything I migh have missed:

>
```
Each sample counts as 0.01 seconds.
% cumulative self self total
time seconds seconds calls ms/call ms/call name
100.21 0.41 0.41 1 410.86 410.86 do_dump
0.00 0.41 0.00 9197 0.00 0.00 mywrite
```

I checked the code GCC [5] produced (all code was compiled with -O3, a very high level of optimization) and well, I'm not sure I could have done much better, and probably would have done worse—GCC inlined everything into do_dump() (with the exception of main() and mywrite()), something I would not have done in assembly (and have any hope of code reuse for another project). So I think we're done with making this code fast.

That's not to say I won't do an assembly version of this program, but it probably won't be for the x86 line.

[1] /boston/2012/01/29.1

[2] /boston/2012/01/20.2

[3] /boston/2012/01/28.1

[4] /boston/2012/01/29.1

[5] http://gcc.gnu.org/

[6] /boston/2012/01/29.1

[7] /boston/2012/01/31.2

Gemini Mention this post

Contact the author