I still stand by what I said in part 24 [1]:
Now, is this the fastest version possible? I'm not going to say yes this time. There might be something else that could be done to wring that last bit of performance out of this code, but at this point, I am definitely done with wringing out the speed.
That didn't prevent Dave Täht [2] from sending in a patch to part 24 that used mmap() (a system call that does some magic to make a file suddenly appear in memory) which did better on his system, although it was a percentage gain, not an order of magnitude gain.
>
```
/*************************************************************************
*
* Copyright 2012 by Sean Conner. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Comments, questions and criticisms can be sent to: sean@conman.org
*
*************************************************************************/
/* Style: C89, const correctness, assertive, system calls, full buffering */
/* lookup tables, mmap() */
#define _GNU_SOURCE
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#define LINESIZE 16
/********************************************************************/
extern const char *sys_errlist[];
extern int sys_nerr;
static void do_dump (const int,const int);
static size_t do_dump_memory (unsigned char *,size_t,size_t,const int);
static size_t dump_line (char **const,unsigned char *,size_t,const unsigned long);
static void hexout (char *const,unsigned long,size_t,const int);
static void myperror (const char *const);
static size_t myread (const int,char *,size_t);
static void mywrite (const int,const char *const,const size_t);
/********************************************************************/
int main(const int argc,const char *const argv[])
{
if (argc == 1)
do_dump(STDIN_FILENO,STDOUT_FILENO);
else
{
int i;
for (i = 1 ; i < argc ; i++)
{
int fhin;
fhin = open(argv[i],O_RDONLY);
if (fhin == -1)
{
myperror(argv[i]);
continue;
}
mywrite(STDOUT_FILENO,"-----",5);
mywrite(STDOUT_FILENO,argv[i],strlen(argv[i]));
mywrite(STDOUT_FILENO,"-----\n",6);
do_dump(fhin,STDOUT_FILENO);
if (close(fhin) < 0)
myperror(argv[i]);
}
}
return EXIT_SUCCESS;
}
/************************************************************************/
static void do_dump(const int fhin,const int fhout)
{
struct stat info;
assert(fhin >= 0);
assert(fhout >= 0);
if (fstat(fhin,&info) < 0)
myperror("fstat()");
if (!S_ISREG(info.st_mode))
{
unsigned char buffer[4096];
size_t bytes;
size_t off = 0;
while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0)
off = do_dump_memory(buffer,bytes,off,fhout);
}
else
{
unsigned char *buffer;
buffer = mmap(NULL,info.st_size,PROT_READ,MAP_SHARED,fhin,0);
if (buffer == MAP_FAILED)
myperror("mmap()");
if (madvise(buffer,info.st_size,MADV_SEQUENTIAL | MADV_WILLNEED) < 0)
myperror("madvise()");
do_dump_memory(buffer,info.st_size,0,fhout);
munmap(buffer,info.st_size);
}
}
/********************************************************************/
static size_t do_dump_memory(
unsigned char *p,
size_t bytes,
size_t off,
const int fhout
)
{
char outbuffer[75 * 109];
char *pout;
size_t count;
assert(p != NULL);
assert(fhout >= 0);
memset(outbuffer,' ',sizeof(outbuffer));
count = 0;
pout = outbuffer;
while(bytes)
{
size_t amount;
amount = dump_line(&pout,p,bytes,off);
p += amount;
bytes -= amount;
off += amount;
count++;
if (count == 109)
{
mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
memset(outbuffer,' ',sizeof(outbuffer));
count = 0;
pout = outbuffer;
}
}
if ((size_t)(pout - outbuffer) > 0)
mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
return off;
}
/******************************************************************/
static size_t dump_line(
char **const pline,
unsigned char *p,
size_t bytes,
const unsigned long off
)
{
char *line;
char *dh;
char *da;
size_t count;
assert(pline != NULL);
assert(*pline != NULL);
assert(p != NULL);
assert(bytes > 0);
line = *pline;
hexout(line,off,8,':');
if (bytes > LINESIZE)
bytes = LINESIZE;
p += bytes;
dh = &line[10 + bytes * 3];
da = &line[58 + bytes];
for (count = 0 ; count < bytes ; count++)
{
p --;
da --;
dh -= 3;
*da = "................................ !\"#$%&'()*+,-./0123456789:;<=>?"
"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~."
"................................................................"
"........................................................"
"........"[*p];
hexout(dh,(unsigned long)*p,2,' ');
}
line[58 + count] = '\n';
*pline = &line[59 + count];
return count;
}
/**********************************************************************/
static void hexout(char *const dest,unsigned long value,size_t size,const int padding)
{
assert(dest != NULL);
assert(size > 0);
assert((padding >= ' ') && (padding <= '~'));
dest[size] = padding;
while(size--)
{
dest[size] = "0123456789ABCDEF"[value & 0x0f];
value >>= 4;
}
}
/************************************************************************/
static void myperror(const char *const s)
{
int err = errno;
assert(s != NULL);
mywrite(STDERR_FILENO,s,strlen(s));
mywrite(STDERR_FILENO,": ",2);
if (err > sys_nerr)
mywrite(STDERR_FILENO,"(unknown)",9);
else
mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err]));
mywrite(STDERR_FILENO,"\n",1);
}
/************************************************************************/
static size_t myread(const int fh,char *buf,size_t size)
{
size_t amount = 0;
assert(fh >= 0);
assert(buf != NULL);
assert(size > 0);
while(size > 0)
{
ssize_t bytes;
bytes = read(fh,buf,size);
if (bytes < 0)
{
myperror("read()");
exit(EXIT_FAILURE);
}
if (bytes == 0)
break;
amount += bytes;
size -= bytes;
buf += bytes;
}
return amount;
}
/*********************************************************************/
static void mywrite(const int fh,const char *const msg,const size_t size)
{
assert(fh >= 0);
assert(msg != NULL);
assert(size > 0);
if (write(fh,msg,size) < (ssize_t)size)
{
if (fh != STDERR_FILENO)
myperror("output");
exit(EXIT_FAILURE);
}
}
/***********************************************************************/
```
I tried it on my system, and saw no difference in performance whatsoever. But Dave was using a 64-bit system, and I was using a 32-bit system. Okay, there could be a difference there. I then tried it on a 64-bit system (The Corporation provided laptop, running a 64-bit version of Linux) and there was a difference, but well:
>
```
[spc]saltmine:~/source/99>time ./24 libxul.so >/dev/null
real 0m0.043s
user 0m0.030s
sys 0m0.010s
[spc]saltmine:~/source/99>time ./26 libxul.so >/dev/null
real 0m0.054s
user 0m0.040s
sys 0m0.010s
```
The version with mmap() is slower! It's more noticeable with a large (759,012,536 bytes) file:
>
```
[spc]saltmine:~/source/99>time ./24 largedata >/dev/null
real 0m1.682s
user 0m1.500s
sys 0m0.170s
[spc]saltmine:~/source/99>time ./26 largedata >/dev/null
real 0m1.809s
user 0m1.680s
sys 0m0.120s
```
Yes, time spent in the kernel goes down (understandable, since we no longer have to copy data out of the file through the kernel) but the overall time goes up. And at this point, we've reached the point of diminishing returns [3], where the amount of return does not justify the amount of effort. It could be that Dave's machine had more memory than my machine, or a faster harddrive, or a later version of the kernel that handled mmap()/madvise() better. There's no real gains at this point to be had.
Another interesting thing is to run the time command, but with more verbose output:
>
```
[spc]saltmine:~/source/99>time -v ./24 largedata >/dev/null
Command being timed: "./24 largedata"
User time (seconds): 1.47
System time (seconds): 0.22
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.69
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1600
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 139
Voluntary context switches: 1
Involuntary context switches: 171
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
```
Here we see the code from part 24 taking 139 page faults. Now, today's version:
>
```
[spc]saltmine:~/source/99>time -v ./26 largedata >/dev/null
Command being timed: "./26 largedata"
User time (seconds): 1.71
System time (seconds): 0.10
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.82
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 2965440
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 185449
Voluntary context switches: 1
Involuntary context switches: 182
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
```
The number of page faults skyrockets to 185,449, three orders of magnitude more than the previous version, and thus, that could account for the time loss on my 64-bit system (quite possibly this does show a sub-optimal implementation of mmap()).
Your milage may vary, though.
[2] http://the-edge.blogspot.com/