/*
**++
**  FACILITY:
**      REBUILD_ITEM_FILE
**
**  ABSTRACT:
**      This program does a salvage on the news database.
**
**	The program reads the headers of all locally stored item files, and
**	attempts to reconstruct the file news_root:news.items based on the
**	local files.
**
**  USAGE:
**      Rebuild_Item_File [index-file] [item-files]
**
**	defaults are:
**
**	index-file	NEWS_ROOT:NEWS.ITEMS
**	item-files	NEWS_DEVICE:[000000...]*.ITM;*
**
**  AUTHOR:
**      Geoff Huston
**
**  COPYRIGHT:
**      Copyright, Geoff Huston  1991
**
**  VERSION:
**	V1.0	24/4/91		gh
**	V1.1	6-Aug-1991	efa
**	 - Correct some initial coding mistakes.  Print more information if a
**	   duplicate message-id encountered.  Continue on if duplicate
**	   message-id encountered.  Try to speed up routine by adding default
**	   allocation, allocation, and buffer count parameters.  Fixed parsing
**	   of second parameter where first character was getting dropped.  Fixed
**	   bug where all articles had today's date as creation date in
**	   news.items file created.
**	    6-Aug-1991 V1.1 of REBUILD_ITEM_FILE.C
**	    I finally had a need to use the program and found some errors in it.
**	    The following are fixed in my version:
**		1) A printf statement didn't print out the line count as it was
**		   running correctly.  A missing ", item_count" was the culprit.
**      	2) If the item-files parameter 2 on the command line was specified,
**		   the first character was accidently chopped off.  The file specification
**		   news_device:[*...]*.itm became ews_device:[*...]*.itm.  The problem was an
**		   un-needed ++argv[2] for some reason.
**		3) If an error had occured such as a duplicate record encountered, the
**		   program immediately failed.  The problem I had was this was the reason I was
**		   rebuilding the item file in the first place!  I now report an error and
**		   continue.  Only the first occurance of a unique message-id is entered into the
**		   news.items file anyway.
**		4) I also print out more information if a bad record is encountered
**		   letting the user know what happened.  Before you would get:
**			PUT returned VMS error:
**			%RMS-F-DUP, duplicate key detected (DUP not set)
**		   Now you get:
**			Error adding: <comp.mail.misc:3376> <1170@dumbcat.sf.ca.us>
**			PUT returned VMS error:
**			%RMS-F-DUP, duplicate key detected (DUP not set)
**		5) A coding error caused the creation date of every article to be
**		   today's date.  The stat() function returns a '0' for success and the code was
**		   assuming a '1' for success.
**		6) To try to speed up the creation of the news.items file I have added
**		   an allocation of 150, a default allocation of 150, and a global buffer count of
**		   50 to the item file when opened.
**		7) I have also included a command procedure to delete the duplicate
**		   items from the news_device:[*...] directory structure.  The input is the name
**		   of the logfile from the rebuild_item_file run.
**	     -Earle Ake
**	     Internet: ake@dayton.saic.com
**	     SPAN 28284::ake
**--
**/

#module RBLDITMFILE "V1.1"

#define _REBUILD_ITEM_FILE_C

#include <stat.h>
#include <ctype.h>
#include <descrip.h>
#include <rms.h>
#include <stdio.h>
#include <time.h>
#include "newsdefine.h"

#define IOSIZE 512

struct FAB grpfab, itmfab;
struct RAB grprab, itmrab;
struct XABKEY xabkey_1, xabkey_2;
struct XABPRO xabpro_1;

GRP newsgrp;
ITM newsitm;

void open_group_file();
void open_index_file(char *);
void scan_files(char *);
void fill_newsitm(char *);
char *util_undir(char *);
void util_cvrt(char *, char *);
void strip(char *, int);
void util_idcpy(char *, char *);
void util_fromcpy(char *, char *);
void util_subjcpy(char *, char *);
int cvt_date(int *);
int cvt_date_val(char *);
int gmt_to_local(int);
int local_to_gmt(int);
int parse_usenet_date(char *);
int substrcmp(char *, char *);
void strip_compress_lower(char *);
void close_index_file();
void close_group_file();


main(argc, argv)
  int argc;
  char *argv[];
{
  char *itemfiles = Itm_files;
  char *item_index = ITM_FILENAME;

  if (argc > 3) {
    printf("Usage: Rebuild_Item_File [index-file] [item-files]\n");
    exit(1);
    }
  if (--argc) {
    item_index = argv[1];
    if (--argc) itemfiles = argv[2];
    }
  open_group_file();
  open_index_file(item_index);
  scan_files(itemfiles);
  close_index_file();
  close_group_file();
}

void open_group_file()
{
  int status;

  grpfab = cc$rms_fab;
  grpfab.fab$b_fac = FAB$M_GET ;
  grpfab.fab$l_fna = GRP_FILENAME;
  grpfab.fab$b_fns = strlen(grpfab.fab$l_fna);
  grpfab.fab$b_shr = FAB$M_SHRDEL | FAB$M_SHRGET | FAB$M_SHRPUT | FAB$M_SHRUPD;

  grprab = cc$rms_rab;
  grprab.rab$l_fab = &grpfab;
  grprab.rab$l_rbf = grprab.rab$l_ubf = (char *) &newsgrp;
  grprab.rab$w_rsz = grprab.rab$w_usz = sizeof newsgrp;

  if (!((status = sys$open(&grpfab)) & 1)) {
    printf("VMS error - cannot open news_root:news.groups\n");
    exit(status);
    }
  if (!((status = sys$connect(&grprab)) & 1)) {
    printf("VMS error - cannot connect to news_root:news.groups\n");
    exit(status);
    }
}

void open_index_file(file_name)
  char *file_name;
{
  int status;

  itmfab = cc$rms_fab;
  itmfab.fab$b_bks = 3;			/* bucket size			*/
  itmfab.fab$l_alq = 50000;		/* allocation quantity		*/
  itmfab.fab$w_deq = 50000;		/* default allocation quantity	*/
  itmfab.fab$w_gbc = 50;		/* Global buffer count		*/
  itmfab.fab$b_fac = FAB$M_PUT ;	/* file access			*/
  itmfab.fab$l_fna = file_name;
  itmfab.fab$b_fns = strlen(itmfab.fab$l_fna);
  itmfab.fab$l_fop = FAB$M_CIF;
  itmfab.fab$w_mrs = sizeof newsitm;
  itmfab.fab$b_org = FAB$C_IDX;
  itmfab.fab$b_rat = FAB$M_CR;
  itmfab.fab$b_rfm = FAB$C_FIX;
  itmfab.fab$b_shr = FAB$M_SHRDEL | FAB$M_SHRGET | FAB$M_SHRPUT | FAB$M_SHRUPD;
  itmfab.fab$l_xab = (char *) &xabkey_1;

  xabkey_1 = cc$rms_xabkey;
  xabkey_1.xab$b_dtp = XAB$C_BN8;
  xabkey_1.xab$b_flg = 0;
  xabkey_1.xab$w_pos0 = (char *) &newsitm.itm_num - (char *) &newsitm;
  xabkey_1.xab$b_ref = 0;
  xabkey_1.xab$b_siz0 = 8;
  xabkey_1.xab$l_nxt = (char *) &xabkey_2;

  xabkey_2 = cc$rms_xabkey;
  xabkey_2.xab$b_dtp = XAB$C_STG;
  xabkey_2.xab$b_flg = 0;
  xabkey_2.xab$w_pos0 = (char *) &newsitm.itm_id - (char *) &newsitm;
  xabkey_2.xab$w_pos1 = (char *) &newsitm.itm_grp - (char *) &newsitm;
  xabkey_2.xab$b_ref = 1;
  xabkey_2.xab$b_siz0 = IDLEN;
  xabkey_2.xab$b_siz1 = 4;
  xabkey_2.xab$l_nxt = (char *) &xabpro_1;

  xabpro_1 = cc$rms_xabpro;
  xabpro_1.xab$w_pro = 0xEE00;

  itmrab = cc$rms_rab;
  itmrab.rab$l_fab = &itmfab;
  itmrab.rab$b_krf = 0;
  itmrab.rab$b_ksz = 8;
  itmrab.rab$l_rbf = itmrab.rab$l_ubf = (char *) &newsitm;
  itmrab.rab$w_rsz = itmrab.rab$w_usz = sizeof newsitm;

  if (!((status = sys$create(&itmfab)) & 1)) {
    printf("VMS error - cannot create %s\n",file_name);
    exit(status);
    }
  if (!((status = sys$connect(&itmrab)) & 1)) {
    printf("VMS error - cannot connect to %s\n",file_name);
    exit(status);
    }
}

void scan_files(itm_files)
  char *itm_files;
{
  int context = 0, status, version, no_grp, item_count = 0;
  unsigned int itm_key[2];
  char fnam[257], grp[257], lstgrp[257], inline[IOSIZE], *s, *sf, *p;
  FILE *fpr;
  struct dsc$descriptor itm_dsc;
  struct stat sbuffer;
  $DESCRIPTOR(fnam_dsc,fnam);

  itm_dsc.dsc$w_length = strlen(itm_files);
  itm_dsc.dsc$b_dtype = DSC$K_DTYPE_T;
  itm_dsc.dsc$b_class = DSC$K_CLASS_S;
  itm_dsc.dsc$a_pointer = itm_files;

  grprab.rab$l_kbf = grp;
  grprab.rab$b_ksz = SUBJLEN;
  grprab.rab$b_krf = 0;
  grprab.rab$l_rop = RAB$M_RRL | RAB$M_NLK ;
  grprab.rab$b_rac = RAB$C_KEY;

  itmrab.rab$l_kbf = (char *) itm_key;
  itmrab.rab$b_krf = 0;
  itmrab.rab$b_ksz = 8;
  itmrab.rab$l_rop = RAB$M_RRL | RAB$M_NLK ;
  itmrab.rab$b_rac = RAB$C_KEY;

  printf("Scanning Item files: %s:\n",itm_files);

  *lstgrp = '\0';
  while ((status = lib$find_file(&itm_dsc,&fnam_dsc,&context,0,0,0,0)) & 1) {
    fnam[256] = '\0';

    if (!(++item_count % 100)) printf("%d\r",item_count);
     
    if (s = strchr(fnam,' ')) *s = '\0';
    if (!(sf = strrchr(fnam,':'))) continue;
    if (!(sf = strrchr(fnam,'['))) continue;
    ++sf;
    if (!(p = strchr(sf,']'))) continue;
    *p = ' ';
    if (*sf == '0') {
      if (!(sf = strchr(sf,'.'))) continue;
      sf++;
      }
    if (sscanf(sf,"%s %d.ITM;%d",grp,&itm_key[0],&version) != 3) {
      *p = ']';
      printf("\tUnrecognized news item file - ?%s?\n",fnam);
      continue;
      }
    *p = ']';
    strcpy(grp,util_undir(grp));
    util_cvrt(grp,grp);

    if (strcmp(grp,lstgrp)) {
      util_cvrt(lstgrp,grp);
      if (!(sys$get(&grprab) & 1)) {
        printf("\tCannot match directory to groupname - %s\n",lstgrp);
        no_grp = 1;
        }
      else {
        no_grp = 0;
        itm_key[1] = newsgrp.grp_num;
        }
      }
    if (no_grp) continue;

    newsitm.itm_num = itm_key[0];
    newsitm.itm_grp = itm_key[1];
    sprintf(inline,"<%d:%d>",newsgrp.grp_num,newsitm.itm_num);
    util_idcpy(newsitm.itm_id,inline);
    time(&newsitm.itm_recvdate);
    newsitm.itm_postdate = newsitm.itm_recvdate;
    newsitm.itm_lines = 0;
    newsitm.itm_title[0] = '\0';
    newsitm.itm_from[0] = '\0';
    newsitm.itm_cachedate = newsitm.itm_life = newsitm.itm_flags = newsitm.itm_cid = 0;

    if (!(sys$get(&itmrab) & 1)) {
      fpr = fopen(fnam,"r");
      while (fgets(inline,IOSIZE,fpr)) {
        if (*inline == '\n') break;
	fill_newsitm(inline);
	}
      fclose(fpr);

      if (stat(fnam,&sbuffer) == 0) newsitm.itm_recvdate = sbuffer.st_ctime;
      if (!((status = sys$put(&itmrab)) & 1)) {
        printf("\nError adding: ");
        printf("<%s:%d> ",grp,newsitm.itm_num);
        printf("%s\n", newsitm.itm_id);
        printf("\tPUT returned VMS error:\n");
        if ((status & 0x000fffff) == 0x000184ec)
          printf("%%RMS-F-DUP, duplicate key detected (DUP not set)\n");
        else
          lib$signal(status);
        }
      }
    }
  lib$find_file_end(&context);
  printf("\n\nAdded %d news articles.\n", item_count);
}

void fill_newsitm(header_line)
  char *header_line;
{
  char *cp;
  if (cp = strchr(header_line,'\n')) *cp = '\0';
  if (!strncmp(header_line,"From:",5)) {
    cp = &header_line[6];
    while (isspace(*cp)) ++cp;
    util_fromcpy(newsitm.itm_from,cp);
    }
  else if (!strncmp(header_line,"Subject:",8)) {
    cp = &header_line[8];
    while (isspace(*cp)) ++cp;
    util_subjcpy(newsitm.itm_title,cp);
    }
  else if (!strncmp(header_line,"Date:",5)) {
    int tmp;
    cp = &header_line[6];
    while (isspace(*cp)) ++cp;
    if (tmp = parse_usenet_date(cp)) newsitm.itm_postdate = tmp;
    }
  else if (!strncmp(header_line,"Lines:",5)) {
    if (sscanf(header_line,"Lines: %d",&newsitm.itm_lines) == 1)
      newsitm.itm_flags |= NEWS_M_LINESVALID;
    }
  else if (!strncmp(header_line,"Message-ID:",11)) {
    cp = &header_line[12];
    while (isspace(*cp)) ++cp;
    util_idcpy(newsitm.itm_id,cp);
    }
}

/*
 *  util_undir
 *
 *  Convert a directory name to a newsgroup string
 */

char *util_undir(input)
  char *input;
{
  static char undir_result[SUBJLEN];
  char *p = undir_result,
       *in = input;

  while (*in) {
    if (*in == '_') {
      in++;
      if (*in < 'A') *p++ = (*in - '0') + ':';
      else if (*in < 'P') *p++ = (*in - 'A') + '!';
      else if (*in < 'V') *p++ = (*in - 'P') + '[';
      else *p++ = (*in - 'V') + '{';
      }
    else *p++ = tolower(*in);
    in++;
    }
  *p = '\0';
  return(undir_result);
}

/*
 *  util_cvrt
 *
 *  Convert a string into standard newsgroup format
 */

void util_cvrt(result, input)
    char *result,
         *input;
{
  char *p = result,
       *in = input;
  int i;

  while ((*in) && (*in == ' ')) in++;
  strncpy(result,in,SUBJLEN);
  result[SUBJLEN - 1] = '\0';
  strip(result,strlen(result));
  while (*p) {
    if (isgraph(*p)) *p = tolower(*p);
    else *p = '_';
    p++;
    }
  i = strlen(result);
  while (i < SUBJLEN) result[i++] = '\0';
}

/*
 *  strip
 *
 *  remove trailing blanks
 */

void strip(p,n)
  char *p;
  int n;
{
  do {
    n--;
    } while ((n > 0) && (p[n] == ' '));
  p[n+1] = '\0';
}

/*
 *  util_idcpy
 *
 *  block copy (including padding nulls)
 */

void util_idcpy(result,input)
  char *result,
       *input;
{
  int i = 0, j = 0;

  while (i < IDLEN) result[i++] = input[j] ? input [j++] : input[j];
  result[--i] = '\0';
}
/*
 *  util_fromcpy
 *
 *  block copy (including padding nulls)
 */

void util_fromcpy(result,input)
  char *result,
       *input;
{
  int i = 0, j = 0;

  while (i < FROMLEN) result[i++] = input[j] ? input [j++] : input[j];
  result[--i] = '\0';
}

/*
 *  util_subjcpy
 *
 *  block copy (including padding nulls)
 */

void util_subjcpy(result,input)
  char *result,
       *input;
{
  int i = 0, j = 0;

  while (i < SUBJLEN) result[i++] = input[j] ? input [j++] : input[j];
  result[--i] = '\0';
}

/*
 *  cvt_date_val
 *
 *  Convert from VMS date format to unix integer date value
 */

int cvt_date(vdate)
  int *vdate;
{
  int offset[2] = {0X4BEB4000, 0X007C9567},
      adjtim[2],
      divisor = 10000000,
      udate;

  if (vdate[1] < 0) {
    int now[2],
        then[2],
        len = 2;

    sys$gettim(now);
    lib$subx(now,vdate,then, &len);
    *vdate = *then;
    *(vdate + 1) = *(then + 1);
    }
  lib$subx(vdate, offset, adjtim, c$ac(2));
  lib$ediv(&divisor, adjtim, &udate, vdate);
  return(udate);
}

int cvt_date_val(str)
    char *str;
{
  char locstr[132],
       *l = locstr,
       *p = str;
  time_t ctime;
  struct tm *stm;
  int vdate[2];
  unsigned short ngroup_len = 0;
  $DESCRIPTOR(locstr_dsc,locstr);


  do {
    *l++ = toupper(*p++);
    } while (*p);
  *l = '\0';

  if (!strcmp(locstr,"TODAY")) {
    time(&ctime);
    stm = localtime(&ctime);
    return(ctime - (stm->tm_sec + (stm->tm_min * 60) + (stm->tm_hour * 3600)));
    }
  if (!strcmp(locstr,"YESTERDAY")) {
    time(&ctime);
    stm = localtime(&ctime);
    return(ctime - (stm->tm_sec + (stm->tm_min * 60) + (stm->tm_hour * 3600) + DAY_SECS));
    }
  if (!strcmp(locstr,"TOMORROW")) {
    time(&ctime);
    stm = localtime(&ctime);
    return(ctime - (stm->tm_sec + (stm->tm_min * 60) + (stm->tm_hour * 3600)) + DAY_SECS);
    }
  locstr_dsc.dsc$w_length = strlen(locstr);
  if (!(sys$bintim(&locstr_dsc,vdate) & 1)) return(0);
  else return(cvt_date(vdate));
}

/*
 *  parse_usenet_date
 *
 *  Convert a Unix date string to a time value.
 */

static int gmt_offset_set = 0,
    gmt_offset = 0;

int gmt_to_local(filt_date)
  int filt_date;
{
  char *cp,
       news_gmt_offset[256];
  int sign = 1,
      hr,mn,se;

  if (!gmt_offset_set) {
    if (cp = getenv("NEWS_GMT_OFFSET")) {
      strcpy(news_gmt_offset,cp);
      strip_compress_lower(news_gmt_offset);
      cp = news_gmt_offset;
      if (*cp == '-') {
        sign = -1;
        ++cp;
        }
      else if (*cp == '+') ++cp;
      hr = mn = se = 0;
      if (sscanf(cp,"%d:%d:%d",&hr,&mn,&se) > 0) {
        gmt_offset = (60 * 60 * hr) + (60 * mn) + se;
        gmt_offset *= sign;
        }
      }
    else gmt_offset_set = 1;
    }
  return(filt_date + gmt_offset);
}

int local_to_gmt(local_date)
  int local_date;
{
  if (!gmt_offset_set) gmt_to_local(0);
  return(local_date - gmt_offset);
}

int parse_usenet_date(s)
  char *s;
{
  char *cp, mon[80], pdate[15];
  int dom = 0, yr = 0, hr = 0, mn = 0, sc = 0, cvttime;

  if (!s || !*s) return(0);
  if (cp = strchr(s,',')) s = ++cp;
  while (isspace(*s)) s++;
  *mon = '\0';
  if (isdigit(*s)) {
    sscanf(s,"%d %s %d %d:%d:%d",&dom,mon,&yr,&hr,&mn,&sc);
    yr += 1900;
    }
  else sscanf(s,"%*s %s %d %d:%d:%d %d",mon,&dom,&hr,&mn,&sc,&yr);

  if (!dom || !yr || !*(cp = mon)) return(0);
  if ((dom <= 0) || (dom >= 32)) return(0);
  if ((yr < 1989) || (yr > 2020)) return(0);
  if (strlen(mon) > 10) return(0);
  if ((hr < 0) || (hr > 23)) return(0);
  if ((mn < 0) || (mn > 59)) return(0);
  if ((sc < 0) || (sc > 59)) return(0);

  while (*cp) { *cp = toupper(*cp); ++cp; }
  sprintf(pdate,"%d-%s-%d %d:%d:%d",dom,mon,yr,hr,mn,sc);
  if ((cvttime = cvt_date_val(pdate)) && substrcmp(s,"GMT"))
    return(gmt_to_local(cvttime));
  return(cvttime);
}
/*
 *  substrcmp
 *
 *  return 1 if the second arg is a substring of the first arg
 */

int substrcmp(s,sub)
  char *s,
       *sub;
{
  char *cp = s;
  int i, bl = strlen(sub), sl = strlen(s);

  for (i = 0; i <= (sl - bl); ++i)
    if (!strncmp(&s[i],sub,bl)) return(i+1);
  return(0);
}

/*
 *  strip_compress_lower
 *
 *  Strip off leading and trailing blanks, convert all whitespace to a single
 *  space, and convert all alphas to lower case.
 */

void strip_compress_lower(s)
  char *s;
{
  char *start = s,
       *r = s;

  while (*s) {
    if (isgraph(*s)) *r++ =tolower(*s);
    else if ((r > start) && (*(r-1) != ' ')) *r++ = ' ';
    s++;
    }
  if ((r > start) && (*(r-1) == ' ')) --r;
  *r = '\0';
}

void close_index_file()
{
  sys$close(&itmfab);
}

void close_group_file()
{
  sys$close(&grpfab);
}
