23-Nov-85 07:10:23-MST,9009;000000000001 Return-Path: Received: from BRL-TGR.ARPA by SIMTEL20.ARPA with TCP; Sat 23 Nov 85 07:10:05-MST Received: from usenet by TGR.BRL.ARPA id a004133; 23 Nov 85 8:40 EST From: Charlie Perkins Newsgroups: net.sources Subject: Retrieving data from broken UUCP transfers Message-ID: <300@polaris.UUCP> Date: 22 Nov 85 02:18:42 GMT To: unix-sources@BRL-TGR.ARPA ========= Many, many times I have discovered TM files remaining in our uucp spool directory resulting from incomplete uucp sessions. Moreover, a significant minority of those files contained information that was never retransmitted, for reasons that I cannot undestand. I am a fanatic about not losing data, so I wrote the following programs to make maximum use of the incomplete TM files. This has the effect of sometimes duplicating mail that WAS eventually retransmitted correctly, but my users don't mind that at all. Duplicate news articles are later recognized as such. When all the articles are retrieved from the TM files, they can be moved to the junk directory and processed using "process_junk" which I have posted separately. The files are deTM, deTMunbatch.c, and mv2junk. deTM does not autmatically invoke mv2junk so that it can work without having to lock the news files. mv2junk just has the effect of transferring deTM's results into the junk news directory so that process_junk can get to it. I would recommend at least reading the comments in deTM before trying to run it. deTM_unbatch is a C program, it was too crappy to do as a shell script. I would be willing to install #define's for portability if anybody wants to contribute them. deTM_unbatch has the effect of breaking down a batched news file into its constituent articles. ============================================================ deTM ============================================================ SPOOL=/usr/spool/news LIB=/usr/local/lib/news ACTIVE=$LIB/active LOG=$LIB/log JUNK=$SPOOL/junk NEWSBIN=$SPOOL/bin UUCPDIR=$SPOOL/uucp PATH=/usr/local:/bin:/usr/bin:$NEWSBIN FOO=trash # Only needed to prevent "set" headaches. # # Process TM files. Three cases are known: # 1. Empty file (that is easy!) # 2. Mail file -- send it to the intended # recipient. # 3. Compressed mail -- this is the case # that takes all the code. # # This script assumes that the TM files have been moved into the # directory $UUCPDIR/TM. It is unwise to process the TM # files in /usr/spool/uucp without somehow turning off all the # uucp stuff, and moving the files is easy enough to do. # cd $UUCPDIR/TM for i in TM.?????.??? do filetype=`file $i | sed "s/.*: //"` case "$filetype" in empty) rm -f $i ;; "ascii text") receiver=`sed "s/^To: //p/" $i` case "$receiver" in "") ;; *) mail $receiver < $i esac ;; data) testing=`( /usr/local/lib/news/compress -d < $i 2>&1 ) | sed 1q` case "$testing" in "stdin: not in compressed format"*) testing=`( tar tvf $i 2>&1 ) | sed 1q` case "$testing" in "tar: errno returned 0, Can't read input"*) ;; "directory checksum error"*) ;; *) echo $i may be a tar-format data file. 1>&2 continue esac testing=`( cpio -itv < $i 2>&1 ) | sed 1q` case "$testing" in "Out of phase--get help") echo $i is a data file of undetermined format ;; *) echo $i may be a cpio-format data file. 1>&2 esac continue esac (mkdir $UUCPDIR/$i 2>&1) > /dev/null cd $UUCPDIR/$i /usr/local/lib/news/compress -d < $UUCPDIR/TM/$i | deTM_unbatch $i cd $UUCPDIR/TM ;; *) echo "filetype is $filetype!!" 1>&2 ;; esac done # # Make a directory containing all the news articles, without # duplication. "deTM_unbatch" creates files named after their article ID. # "deTM_unbatch" also creates in each of the "TM" directories containing # the news articles, a file called "index" that is a list of all the # article ID's (== filenames) in the TM directory. # cd $UUCPDIR zerodirs=`find $UUCPDIR/*/index -size 0 -print` rm -f `echo $zerodirs` rmdir `echo $zerodirs | sed "s/.index//g"` mkdir all_articles for i in TM.?????.??? do if test ! -d $UUCPDIR/$i then echo Oh, how terrible -- $UUCPDIR/$i not a directory! 1>&2 exit fi cd $UUCPDIR/$i for j in * do case "$j" in index) continue ;; [1-9]*) destination=$UUCPDIR/all_articles/$j if test ! -f "$destination" then ln $j "$destination" else set $FOO `ls -l "$destination"` oldsize=$6 set $FOO `ls -l $UUCPDIR/$i/$j` newsize=$6 if test "$oldsize" -lt "$newsize" then rm -f "$destination" ln $UUCPDIR/$i/$j $destination fi fi ;; *) echo Oooh, what a weird file -- $UUCPDIR/$i/$j 1>&2 esac done done # # Now run "mv2junk", and then "process_junk". Those two programs should # be run with normal news processing disabled, to avoid having two # programs simultaneously trying to update $ACTIVE. # ============================================================ de_TMunbatch.c ============================================================ /* * unbatchnews: extract news in batched format and process it one article * at a time. The format looks like * #! rnews 1234 * article containing 1234 characters * #! rnews 4321 * article containing 4321 characters * * Special purpose processing: Create new files in * ~news/uucp/TM* using the article ID as the filename. */ #ifndef lint static char *SccsId = "@(#)unbatch.c 1.8 9/3/84"; #endif !lint # include char buf[BUFSIZ]; char hdr[BUFSIZ]; static char RNEWS_STR[] = "#! rnews "; static char MSG_STR[] = "Message-ID: <"; static char TMPNAME[(sizeof "/usr/spool/news/uucp/") + (sizeof "TM.xxxxx.xxx")] = "/usr/spool/news/uucp/"; main(argc, argv) int argc; char **argv; { register int length, x; register FILE *ndx_pfn, *pfn = NULL; register long size; register char *angle_brack, *idptr; char filename[BUFSIZ]; char *index(), *gets(); long atol(); if ((argc != 2) || (strlen (argv[1]) != 12) || (0 != strncmp ("TM.", argv[1], 3))) { fprintf (stderr, "Argument error, argv[1]=%s.\n", argv[1]); exit (1); } strcat (TMPNAME, argv[1]); sprintf (filename, "%s/index", TMPNAME); if (0 == (ndx_pfn = fopen (filename, "w"))) perror(); gets (buf); while (0 == (x = strncmp (buf, RNEWS_STR, (sizeof RNEWS_STR) - 1))) { size = atol (buf - 1 + (sizeof RNEWS_STR)); if(size <= 0) { fprintf (stderr, "Bad size=%d.\n", size); exit (3); } *hdr = NULL; pfn = NULL; while ((pfn == NULL) && (size > 0) && !feof(stdin)) { fgets (buf, BUFSIZ, stdin); /* Save header info until article ID arrives. */ strcat (hdr, buf); if (0 == strncmp (MSG_STR, buf, (sizeof MSG_STR) - 1)) { idptr = buf + (sizeof MSG_STR) - 1; if (angle_brack = index (idptr, '>')) { *angle_brack = NULL; sprintf(filename,"%s/%s",TMPNAME,idptr); pfn = fopen(filename, "w"); fprintf (ndx_pfn, "%s\n", idptr); fwrite (hdr,length = strlen(hdr),1,pfn); if ( 0 >= (size -= length)) { fprintf (stderr, "Oops, header size too big.\n"); exit (4); } } else { fprintf (stderr, "Oops, bad Message-ID.\n"); exit (5); } } } if (pfn == NULL) /* Article ID was not found... */ exit(); while (fgets (buf, BUFSIZ, stdin) && (0 < size)) { fwrite (buf, length=strlen(buf), 1, pfn); size -= length; } if (size != 0) { fprintf (stderr, "Article size != %d, filename=%s.\n", size, filename); exit (6); } fclose(pfn); } if (x) /* x != 0 implies that the RNEWS strncmp test failed above. */ { fprintf(stderr, "out of sync, skipping %s\n", buf); exit (2); } } ============================================================ mv2junk ============================================================ SPOOL=/usr/spool/news NEWSBIN=$SPOOL/bin LIB=/usr/local/lib/news ACTIVE=$LIB/active JUNK=$SPOOL/junk PATH=/usr/local:/bin:/usr/bin:$NEWSBIN (cd $LIB; tar cf - active ) | (cd /usr/tmp; tar xpvf - ) # Save a $ACTIVE count=`awk '$1 == "junk" {print $2}' $ACTIVE` case "$count" in "") exit esac curdir=`pwd | sed "s/ //"` for artdir in $* do cd $curdir/$artdir oldarts=`ls | sort +n` || { echo Something went wrong during $JUNK cleanup. exit ; } for article in $oldarts do count=`expr $count + 1` if test -f $count then echo ln $article $count fails during $JUNK cleanup. else ln $article $JUNK/$count fi done done modactive junk $count echo diff $LIB/active /usr/tmp/active diff $LIB/active /usr/tmp/active -- Charlie Perkins, IBM T.J. Watson Research philabs!polaris!charliep, perk%YKTVMX.BITNET@berkeley, perk.yktvmx.ibm@csnet-relay