/* * Copyright (c) 2001 * Megan Gentry * All Rights Reserved * Commercial Distribution Prohibited * * This software may be freely copied and used in its entirety for any * purpose so long as the above copyright notice and these comments are * preserved in the source form of this software, and the binary * copyright is preserved in any image built from it. * * The author has used best efforts in the research, design, development * and testing of this software. The author makes no warranty of any * kind, expressed or implied, with regard to this software and its * suitability for a given application. The author shall not be liable * in any event for incidental or consequential damages in connection * with, or arising out of, the use or performance of this software. Use * of this software constitutes acceptance of these terms. * * The author is committed to making a best effort at fixing any errors * found in the software and would welcome any reports of problems, * comments or suggestions regarding the software. Please send email * to . */ /* * Abstract and Edit History * * dmextract * This program was designed to read the contents of a DECmail-11 * .MAI file (an indexed file used on RSX and RSTS machines) and * to extract the messages so that they can then be read using * standard U*x mail utilities. Input is from the specified file, * while output is to standard output so that it can either be * filtered or redirected to an output file. * * Edit History: * * (000) 07-Dec-2001 Megan Gentry * Final coding so that the program dumps most of my message * files. The contents of the file it writes are still not * properly processed by U*x mail(x) programs -- they concatenate * some messages (even though all messages are written in * exactly the same sequence... maybe there is a problem with * header or message content). * * (001) 07-Dec-2001 Megan Gentry * Okay, the 'From' line appears to need more than one field * on the line for the message to be properly recognized. A * temporary workaround has been to output something else (like * the date) for those 'From' lines with only one field. * Now the problem is that some lines (notably those from * VMS hosts) have names or witty sayings within quotes in * the from field, which display improperly using U*x mail. * */ #include #include #define BLOCK (512) char copyright[] = "dmextract.c, Copyright (c) 2001 by Megan Gentry"; typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned long ulong; int debug = 0; /* * Message descriptor structure */ struct msginfo { struct msginfo *next; /* Pointer to next record */ struct msginfo *prev; /* Pointer to previous record */ /* not really neaded */ ulong number; /* Message number */ struct { ulong spos; /* File position for header start */ ulong epos; /* File position for header end */ char from[256]; /* From record from header */ char date[256]; /* Date record from header */ char to[256]; /* To record from header */ char cc[256]; /* Cc record from header */ char subject[256]; /* Subj record from header */ char mid[256]; /* For duplicate date (?) from header */ } header; struct { ulong spos; /* File position for text start */ ulong epos; /* File position for text end */ ulong lines; /* Count of lines of text */ } text; }; /* * Message descriptor list head structure */ struct msghead { struct msginfo *head; struct msginfo *tail; ulong count; }; struct msghead msglist; /* Head of message descriptor list */ char cuspname[132]; /* Execution name of this program */ char mailfile[132]; /* Name of mailfile to be referenced */ uchar buffer[BLOCK]; unsigned short getushort(fp) FILE *fp; { char sbuf[2]; short lo, hi; fread (sbuf, 2, 1, fp); lo = sbuf[0] & 0377; hi = sbuf[1] & 0377; return (hi * 0400 + lo); } main (argc, argv) int argc; char *argv[]; { FILE *fopen(), *fp; int i, j, n; int reclen; char ch; uchar c1, c2, c3, c4; int hdrfg, hdrct; int txtfg, txtct, prtfg; ushort *wordp; struct msginfo *msgp; /* Save the cusp name */ strcpy (cuspname, argv[0]); /* Assume the mail.mai file */ strcpy (mailfile, "mail.mai"); if (argc == 2) strcpy (mailfile, argv[1]); else if (argc > 2) { fprintf (stderr, "usage: %s [mail_file]\n", cuspname); exit (1); } /* * In case file was specified without ".mai" filetype, append it */ if (rindex(mailfile,'.') == NULL) { strcat (mailfile, ".mai"); fprintf (stderr, "warning (0): using file %s\n", mailfile); } /* open the mail file */ if ((fp = fopen(mailfile, "rb")) == NULL) { fprintf (stderr, "%s: file not found %s\n", cuspname, mailfile); exit (1); } /* * In this first pass, we scan the file to find the start and end * of all the message headers and text blocks in the file and build * a linked list of messages with that information. */ bzero (msglist, sizeof(struct msghead)); hdrfg = hdrct = 0; txtfg = txtct = 0; /* Preload the character pipe-line */ c1 = getc (fp) & 0377; c2 = getc (fp) & 0377; c3 = getc (fp) & 0377; do { /* Get the next character from the file */ c4 = getc (fp) & 0377; /* * See if the current four characters delineate a * message header */ if ( ((c2 * 0400) + c1) == 01 && ((c4 * 0400) + c3) == 0377) { if (hdrfg == 0) { /* We've located the start of a message header */ hdrfg = 1; hdrct++; if (txtfg) { fprintf (stderr, "warning (1): message header found while processing message %d text block\n", txtct); txtfg = 0; } /* Allocate a new message descriptor */ msgp = (struct msginfo *) malloc (sizeof(struct msginfo)); if (!msgp) { fprintf (stderr, "fatal (1): unable to allocate memory for message %d\n", hdrct); exit (1); } /* Initialize descriptor */ bzero (msgp, sizeof(struct msginfo)); msgp->number = hdrct; msgp->header.spos = ftell(fp)-4; /* Link the new descriptor into the list */ if (msglist.head) { msgp->prev = msglist.tail; (msglist.tail)->next = msgp; msglist.tail = msgp; } else { msglist.head = msgp; msglist.tail = msgp; } /* Keep track of number of messages */ msglist.count++; } else { /* We've located the end of the message header */ msgp->header.epos = ftell(fp)-4; hdrfg = 0; } } /* * See if the current four characters delineate a * text block */ if ( ((c2 * 0400) + c1) == 01 && ((c4 * 0400) + c3) == 0204) { if (txtfg == 0) { /* We've located the beginning of the message */ txtfg = 1; txtct++; msgp->text.spos = ftell(fp)-4; } else { /* We've located the end of the message */ msgp->text.epos = ftell(fp)-4; txtfg = 0; } } /* The pipe-line shifts */ c1 = c2; c2 = c3; c3 = c4; } while (!feof(fp)); /* * Verify that we have the same number of headers as text blocks */ if (hdrct != txtct) { fprintf (stderr, "warning (1): %d headers, %d text blocks\n", hdrct, txtct); } fprintf (stderr, "info (1): %s appears to contain %d %s\n", mailfile, msglist.count, msglist.count == 1 ? "message" : "messages"); /* * In this pass, we extract message header information to fill * each entry in our message descriptor list. */ for (msgp = msglist.head; msgp != NULL; msgp=msgp->next) { fseek (fp, msgp->header.spos+4, SEEK_SET); while (ftell(fp) < msgp->header.epos && (reclen = getushort(fp)) != 01) { fread (buffer, ((reclen+1)&~1), 1, fp); reclen = reclen <= 255 ? reclen : 255; buffer[reclen] = 0; switch ((uchar)buffer[0]) { case 0231: /* Undocumented field type */ /* Appears to duplicate date */ strcpy (&msgp->header.mid[0], &buffer[1]); if (strlen(&msgp->header.date[0]) == 0) strcpy (&msgp->header.date[0], &msgp->header.mid[0]); break; case 0201: /* From: field */ strcpy (&msgp->header.from[0], &buffer[1]); break; case 0202: /* Date: field */ strcpy (&msgp->header.date[0], &buffer[1]); break; case 0205: /* To: field */ strcpy (&msgp->header.to[0], &buffer[1]); break; case 0206: /* Cc: field */ strcpy (&msgp->header.cc[0], &buffer[1]); break; case 0207: /* Subject: field */ strcpy (&msgp->header.subject[0], &buffer[1]); break; } } } /* * In this pass, we process the counted records comprising the * message to 1) count the number of lines of text so we can * report it, and 2) so that we can ensure that processing * continues to the end of a message block. */ /* * NOTE: * It appears that different versions of DECmail did different * things with regard to the text records. I believe they are * are supposed to be counted records, two bytes count, with * n bytes of text, padded with null bytes to account for * odd counts. The problem is that this is not what I found in * practice in all cases. And in some cases, the counts are * fine up to some point, and then they are simply wrong. * So, combining the algorithm I use for determining line length * with a recovery algorithm when the next line's record count * looks wrong, this seems to work best. At the worst, I've * noticed that sometimes, the final character of a line will * be lost for all lines of a message, but not all lines of * all messages in that same file. */ for (msgp = msglist.head; msgp != NULL; msgp=msgp->next) { int linct = 0; fseek (fp, msgp->text.spos+4, SEEK_SET); while (ftell(fp) < msgp->text.epos) { reclen = getushort(fp); linct++; if (reclen > 255) { fprintf (stderr, "warning (3): Message %04d, Line %04d, filpos 0%012lo, reclen %05d\n", msgp->number, linct, ftell(fp)-2, reclen); fprintf (stderr, "info (3): adjusting file position\n"); fseek (fp, ftell(fp)-1, SEEK_SET); --linct; continue; } if (reclen == 0) continue; if (reclen & 01) fread (buffer, reclen, 1, fp); else fread (buffer, reclen-1, 1, fp); msgp->text.lines++; } if (ftell(fp) == msgp->text.epos) continue; fprintf (stderr, "error (3): While processing message %d:\n", msgp->number); fprintf (stderr, " Processing ended at file position 0%012lo\n", ftell(fp)); fprintf (stderr, " Text block ends at file position 0%012lo\n", msgp->text.epos); } /* * In this pass, we finally start outputting the information * obtained from the headers, followed by the associated * message text. Output is done in such a way that it can * be read by U*x standard mail utilities (hopefully). */ for (msgp = msglist.head; msgp != NULL; msgp=msgp->next) { /* * Output the 'From' field. If there is only one * field in the text, add the date as a second field * on the line so that U*x mail utilities can properly * identify starts of messages */ if (index(msgp->header.from,' ')) printf ("From %s\n", &msgp->header.from[0]); else printf ("From %s %s\n", &msgp->header.from[0], &msgp->header.date[0]); printf ("From: %s\n", &msgp->header.from[0]); /* There should always be a recipient specified */ printf ("To: %s\n", &msgp->header.to[0]); /* There doesn't always have to be a CC list */ if (strlen(&msgp->header.cc[0])) printf ("Cc: %s\n", &msgp->header.cc[0]); /* There always has to be a date */ printf ("Date: %s\n", &msgp->header.date[0]); /* There doesn't always have to be a subject */ if (strlen(&msgp->header.subject[0])) printf ("Subject: %s\n", &msgp->header.subject[0]); /* Let's assume the mail files were read at some point */ printf ("Status: RO\n"); /* This is dmextract-specific info for debugging */ printf ("DECmail-Info: %s\n", &msgp->header.mid[0]); printf ("DMextract-Number: %d\n", msgp->number); printf ("DMline-Count: %d\n", msgp->text.lines); /* Separate header from text with a blank line */ printf ("\n"); /* Now we start outputting the message text */ fseek (fp, msgp->text.spos+4, SEEK_SET); while (ftell(fp) < msgp->text.epos) { reclen = getushort(fp); if (reclen > 255) { fseek (fp, ftell(fp)-1, SEEK_SET); continue; } if (reclen == 0) continue; if (reclen & 01) fread (buffer, reclen, 1, fp); else fread (buffer, reclen-1, 1, fp); buffer[reclen] = 0; if (strncmp(&buffer[0],"From",4) == 0) printf (">"); for (i = 0; i < reclen-1; i++) { if (buffer[i] == 012) break; if (buffer[i] == 015) { (void) getc (fp); continue; } printf ("%c", buffer[i]); } printf ("\n"); } printf ("\n"); } }