 /*  *  *  This file is part ofA  *	MakeIndex - A formatter and format independent index processor   *E  *  Copyright (C) 1989 by Chen & Harrison International Systems, Inc. 2  *  Copyright (C) 1988 by Olivetti Research CenterA  *  Copyright (C) 1987 by Regents of the University of California   *  *  Author:   *	Pehong Chen.  *	Chen & Harrison International Systems, Inc.  *	Palo Alto, California  *	USA5  *	(phc@renoir.berkeley.edu or chen@orc.olivetti.com)   *  *  Contributors: @  *	Please refer to the CONTRIB file that comes with this releaseD  *	for a list of people who have contributed to this and/or previous  *	release(s) of MakeIndex.   *D  *  All rights reserved by the copyright holders.  See the copyrightG  *  notice distributed with this software for a complete description of 4  *  the conditions under which it is made available.  *  */    #include    "mkind.h"  #include    "scanid.h"  7 #define CHECK_LENGTH()	if (i > len_field) goto OVERFLOW   ) int     idx_lc;			       /* line count */ 0 int     idx_tc;			       /* total entry count */4 int     idx_ec;			       /* erroneous entry count */< int     idx_dc;			       /* number of dots printed so far */   static int first_entry = TRUE; static int comp_len; static char key[ARGUMENT_MAX]; static char no[NUMBER_MAX]; 0 extern char *strchr ARGS((const char* s,int c));   NODE_PTR head; NODE_PTR tail;  & static	void	flush_to_eol ARGS((void));! static	int	make_key ARGS((void)); 3 static	void	make_string ARGS((char **ppstr,int n)); E static	int	scan_alpha_lower ARGS((char *no,short *npg,short *count)); E static	int	scan_alpha_upper ARGS((char *no,short *npg,short *count)); @ static	int	scan_arabic ARGS((char *no,short *npg,short *count));" static	int	scan_arg1 ARGS((void));" static	int	scan_arg2 ARGS((void));> static	int	scan_field ARGS((int *n,char field[],int len_field,0 				 int ck_level, int ck_encap,int ck_actual));0 static	int	scan_key ARGS((struct KFIELD *data));H static	int	scan_no ARGS((char *no,short *npg,short *count,short *type));E static	int	scan_roman_lower ARGS((char *no,short *npg,short *count)); E static	int	scan_roman_upper ARGS((char *no,short *npg,short *count)); C static	void	search_quote ARGS((char sort_key[],char actual_key[]));    #if     (OS_BS2000 | OS_MVSXA). char    UPCC[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; #endif   void scan_idx(VOID_ARG) {      char    keyword[ARRAY_MAX];      int     c;     int     i = 0;     int     not_eof = TRUE;      int     arg_count = -1;   1     MESSAGE("Scanning input file %s...", idx_fn); *     idx_lc = idx_tc = idx_ec = idx_dc = 0;!     comp_len = strlen(page_comp);      while (not_eof) {   	switch (c = GET_CHAR(idx_fp)) {
 	case EOF: 	    if (arg_count == 2) { 		idx_lc++;  		if (make_key())  		    IDX_DOT(DOT_MAX);  		arg_count = -1;  	    } else  		not_eof = FALSE; 	    break;   
 	case LFD: 	    idx_lc++; 	    if (arg_count == 2) { 		if (make_key())  		    IDX_DOT(DOT_MAX);  		arg_count = -1; ! 	    } else if (arg_count > -1) { ? 		IDX_ERROR("Missing arguments -- need two (premature LFD).\n",  			  NULL);  		arg_count = -1;  	    }
 	case TAB:
 	case SPC: 	    break;   	 	default:  	    switch (arg_count) { 
 	    case -1:  		i = 0; 		keyword[i++] = (char) c; 		arg_count++; 		idx_tc++;  		break; 	    case 0: 		if (c == idx_aopen) {  		    arg_count++; 		    keyword[i] = NUL; ( 		    if (STREQ(keyword, idx_keyword)) { 			if (!scan_arg1()) { 			    arg_count = -1; 			} 		    } else { 			IDX_SKIPLINE;5 			IDX_ERROR("Unknown index keyword %s.\n", keyword);  		    } 
 		} else { 		    if (i < ARRAY_MAX) 			keyword[i++] = (char) c;  		    else { 			IDX_SKIPLINE;6 			IDX_ERROR2("Index keyword %s too long (max %d).\n", 				   keyword, ARRAY_MAX);  		    }  		}  		break; 	    case 1: 		if (c == idx_aopen) {  		    arg_count++; 		    if (!scan_arg2()) {  			arg_count = -1; 		    } 
 		} else { 		    IDX_SKIPLINE; 
 IDX_ERROR(K "No opening delimiter for second argument (illegal character `%c').\n", c);  		}  		break; 	    case 2: 		IDX_SKIPLINE; 
 IDX_ERROR(K "No closing delimiter for second argument (illegal character `%c').\n", c);  		break; 	    } 	    break;  	}     }         /* fixup the total counts */     idx_tt += idx_tc;      idx_et += idx_ec;   B     DONE(idx_tc - idx_ec, "entries accepted", idx_ec, "rejected");     CLOSE(idx_fp); }    static void  flush_to_eol(VOID_ARG)@ {	/* flush to end-of-line, or end-of-file, whichever is first */
     int a;  ;     while ( ((a = GET_CHAR(idx_fp)) != LFD) && (a != EOF) )      	/* NO-OP */;  }   
 static int make_key(VOID_ARG) {      NODE_PTR ptr;      int     i;  (     /* allocate and initialize a node */   #ifdef DEBUG 	totmem += sizeof(NODE);? 	(void)fprintf(stderr,"make_key(): malloc(%d)\ttotmem = %ld\n",  	    sizeof(NODE),totmem); #endif /* DEBUG */  8     if ((ptr = (NODE_PTR) malloc(sizeof(NODE))) == NULL)) 	FATAL("Not enough core...abort.\n", "");   #     for (i = 0; i < FIELD_MAX; i++)      { < 	ptr->data.sf[i] = "";		/* initialize fields to pointers */;7 	ptr->data.af[i] = "";		/* to constant empty strings */      }      ptr->data.encap = "";      ptr->data.lpg[0] = NUL;      ptr->data.count = 0;     ptr->data.type = EMPTY;        /* process index key */       if (!scan_key(&(ptr->data))) 	return (FALSE);       /* determine group type */2     ptr->data.group = group_type(ptr->data.sf[0]);       /* process page number */      strcpy(ptr->data.lpg, no);K     if (!scan_no(no, ptr->data.npg, &(ptr->data.count), &(ptr->data.type)))  	return (FALSE);       if (first_entry) { 	head = tail = ptr;  	first_entry = FALSE;      } else { 	tail->next = ptr; 	tail = ptr;     }      ptr->data.lc = idx_lc;     ptr->data.fn = idx_fn;     tail->next = NULL;       return (TRUE); }    static void  #if STDC  make_string(char **ppstr, int n) #else = make_string(ppstr,n)			/* allocate n-byte string if *ppstr */ / char	**ppstr;			/* points to an empty string */  int	n; #endif { ?     if ((*ppstr)[0] == NUL)		/* then we have an empty string */      {  	(*ppstr) = (char*)malloc(n);  	if ((*ppstr) == (char*)NULL) - 	    FATAL("Not enough core...abort.\n", "");  	(*ppstr)[0] = NUL;      }  }   
 static int #if STDC scan_key(FIELD_PTR data) #else  scan_key(data) FIELD_PTR data;  #endif { .     int     i = 0;		       /* current level */9     int     n = 0;		       /* index to the key[] array */ !     int     second_round = FALSE; !     int     last = FIELD_MAX - 1;        while (TRUE) { 	if (key[n] == NUL)  	    break;  	if (key[n] == idx_encap)  	{	 	    n++; 1 	    make_string(&(data->encap),strlen(key) + 1); G 	    if (scan_field(&n, data->encap, strlen(key), FALSE, FALSE, FALSE))  		break;	 	    else  		return (FALSE);  	} 	if (key[n] == idx_actual) {	 	    n++;  	    if (i == last)  	    {. 		make_string(&(data->af[i]),strlen(key) + 1);/ 		if (!scan_field(&n, data->af[i], strlen(key),  				FALSE, TRUE, FALSE)) 		    return (FALSE);  	    }	 	    else  	    {. 		make_string(&(data->af[i]),strlen(key) + 1);/ 		if (!scan_field(&n, data->af[i], strlen(key),  				TRUE, TRUE, FALSE))  		    return (FALSE);  	    }	 	} else {  	    /* Next nesting level */  	    if (second_round) { 		i++; 		n++; 	    } 	    if (i == last)  	    {. 		make_string(&(data->sf[i]),strlen(key) + 1);/ 		if (!scan_field(&n, data->sf[i], strlen(key),  				FALSE, TRUE, TRUE))  		    return (FALSE);  	    }	 	    else  	    {. 		make_string(&(data->sf[i]),strlen(key) + 1);/ 		if (!scan_field(&n, data->sf[i], strlen(key),  				TRUE, TRUE, TRUE)) 		    return (FALSE);  	    } 	    second_round = TRUE; 1 	    if (german_sort && strchr(data->sf[i], '"'))  	    {6 		make_string(&(data->af[i]),strlen(data->sf[i]) + 1);) 		search_quote(data->sf[i], data->af[i]);  	    } 	}     }   9     /* check for empty fields which shouldn't be empty */      if (*data->sf[0] == NUL) {
 	NULL_RTN;     } '     for (i = 1; i < FIELD_MAX - 1; i++)  	if ((*data->sf[i] == NUL) && < 	    ((*data->af[i] != NUL) || (*data->sf[i + 1] != NUL))) { 	    NULL_RTN; 	}     /* i == FIELD_MAX-1 */9     if ((*data->sf[i] == NUL) && (*data->af[i] != NUL)) { 
 	NULL_RTN;     }      return (TRUE); }   
 static int #if STDCL scan_field(int *n, char field[], int len_field, int ck_level, int ck_encap,  	int ck_actual)  #else > scan_field(n, field, len_field, ck_level, ck_encap, ck_actual)
 int    *n; char    field[];E int	len_field;	/* length of field[], EXCLUDING space for final NUL */  int     ck_level;  int     ck_encap;  int     ck_actual; #endif {      int     i = 0;/     int     nbsh;		       /* backslash count */   B     if (compress_blanks && ((key[*n] == SPC) || (key[*n] == TAB))) 	++* n;        while (TRUE) {
 	nbsh = 0; 	while (key[*n] == idx_escape) 	{ 	    nbsh++; 	    field[i++] = key[*n]; 	    CHECK_LENGTH();
 	    ++*n; 	}   	if (key[*n] == idx_quote) 	{ 	    if (nbsh % 2 == 0)  		field[i++] = key[++*n]; 	 	    else  		field[i++] = key[*n];  	    CHECK_LENGTH(); 	}1 	else if ((ck_level && (key[*n] == idx_level)) || * 		 (ck_encap && (key[*n] == idx_encap)) ||, 		 (ck_actual && (key[*n] == idx_actual)) || 		 (key[*n] == NUL)) 	{= 	    if ((i > 0) && compress_blanks && (field[i - 1] == SPC))  		field[i - 1] = NUL; 	 	    else  		field[i] = NUL;  	    return (TRUE); 	 	} else {  	    field[i++] = key[*n]; 	    CHECK_LENGTH();1 	    if ((!ck_level) && (key[*n] == idx_level)) { > 		IDX_ERROR2("Extra `%c' at position %d of first argument.\n", 			   idx_level, *n + 1);  		return (FALSE); 8 	    } else if ((!ck_encap) && (key[*n] == idx_encap)) {> 		IDX_ERROR2("Extra `%c' at position %d of first argument.\n", 			   idx_encap, *n + 1);  		return (FALSE); : 	    } else if ((!ck_actual) && (key[*n] == idx_actual)) {> 		IDX_ERROR2("Extra `%c' at position %d of first argument.\n", 			   idx_actual, *n + 1); 		return (FALSE);  	    } 	}+ 	/* check if max field length is reached */  	if (i > len_field)  	{
 	OVERFLOW: 	    if (!ck_encap) { @ 		IDX_ERROR("Encapsulator of page number too long (max. %d).\n", 			  len_field); 	    } else if (ck_actual) {? 		IDX_ERROR("Index sort key too long (max. %d).\n", len_field); 
 	    } else { B 		IDX_ERROR("Text of key entry too long (max. %d).\n", len_field); 	    } 	    return (FALSE); 	} 	++*n;     }  #if    IBM_PC_TURBO @     return (FALSE);		/* not reached, but keeps compiler happy */ #endif }    int  #if STDC group_type(char *str)  #else  group_type(str)  char   *str; #endif {      int     i = 0;  .     while ((str[i] != NUL) && ISDIGIT(str[i])) 	i++;        if (str[i] == NUL) { 	sscanf(str, "%d", &i);  	return (i);      } else if (ISSYMBOL(str[0])) 	return (SYMBOL);      else 	return (ALPHA); }   
 static int #if STDC: scan_no(char no[], short npg[], short *count, short *type) #else  scan_no(no, npg, count, type) 
 char    no[];  short   npg[]; short  *count;
 short  *type;  #endif {      int     i = 1;       if (isdigit(no[0])) {  	*type = ARAB;" 	if (!scan_arabic(no, npg, count)) 	    return (FALSE);B 	/* simple heuristic to determine if a letter is Roman or Alpha */;     } else if (IS_ROMAN_LOWER(no[0]) && (!IS_COMPOSITOR)) {  	*type = ROML;' 	if (!scan_roman_lower(no, npg, count))  	    return (FALSE);B 	/* simple heuristic to determine if a letter is Roman or Alpha */'     } else if (IS_ROMAN_UPPER(no[0]) && 3 	       ((no[0] == ROMAN_I) || (!IS_COMPOSITOR))) {  	*type = ROMU;' 	if (!scan_roman_upper(no, npg, count))  	    return (FALSE);'     } else if (IS_ALPHA_LOWER(no[0])) {  	*type = ALPL;' 	if (!scan_alpha_lower(no, npg, count))  	    return (FALSE);'     } else if (IS_ALPHA_UPPER(no[0])) {  	*type = ALPU;' 	if (!scan_alpha_upper(no, npg, count))  	    return (FALSE);     } else {, 	IDX_ERROR("Illegal page number %s.\n", no); 	return (FALSE);     }      return (TRUE); }     
 static int #if STDC1 scan_arabic(char no[], short npg[], short *count)  #else  scan_arabic(no, npg, count) 
 char    no[];  short   npg[]; short  *count; #endif {      short   i = 0;<     char    str[ARABIC_MAX+1];		/* space for trailing NUL */  E     while ((no[i] != NUL) && (i <= ARABIC_MAX) && (!IS_COMPOSITOR)) {  	if (isdigit(no[i])) { 	    str[i] = no[i];	 	    i++; 	 	} else { D IDX_ERROR2("Illegal Arabic digit: position %d in %s.\n", i + 1, no); 	    return (FALSE); 	}     }      if (i > ARABIC_MAX) { ? 	IDX_ERROR2("Arabic page number %s too big (max %d digits).\n",  		   no, ARABIC_MAX);  	return (FALSE);     }      str[i] = NUL;   -     ENTER(strtoint(str) + page_offset[ARAB]);        if (IS_COMPOSITOR)5 	return (scan_no(&no[i + comp_len], npg, count, &i));      else 	return (TRUE);  }     
 static int #if STDC6 scan_roman_lower(char no[], short npg[], short *count) #else   scan_roman_lower(no, npg, count)
 char    no[];  short   npg[]; short  *count; #endif {      short   i = 0;     int     inp = 0;     int     prev = 0;      int     the_new;  C     while ((no[i] != NUL) && (i < ROMAN_MAX) && (!IS_COMPOSITOR)) {  	if ((IS_ROMAN_LOWER(no[i])) && 1 	    ((the_new = ROMAN_LOWER_VAL(no[i])) != 0)) {  	    if (prev == 0)  		prev = the_new;  	    else {  		if (prev < the_new) {  		    prev = the_new - prev; 		    the_new = 0; 		}  		inp += prev; 		prev = the_new;  	    }	 	} else { D IDX_ERROR2("Illegal Roman number: position %d in %s.\n", i + 1, no); 	    return (FALSE); 	} 	i++;      }      if (i == ROMAN_MAX) { > 	IDX_ERROR2("Roman page number %s too big (max %d digits).\n", 		   no, ROMAN_MAX); 	return (FALSE);     }      inp += prev;  #     ENTER(inp + page_offset[ROML]);        if (IS_COMPOSITOR)5 	return (scan_no(&no[i + comp_len], npg, count, &i));      else 	return (TRUE);  }     
 static int #if STDC6 scan_roman_upper(char no[], short npg[], short *count) #else   scan_roman_upper(no, npg, count)
 char    no[];  short   npg[]; short  *count; #endif {      short   i = 0;     int     inp = 0;     int     prev = 0;      int     the_new;  C     while ((no[i] != NUL) && (i < ROMAN_MAX) && (!IS_COMPOSITOR)) {  	if ((IS_ROMAN_UPPER(no[i])) && 1 	    ((the_new = ROMAN_UPPER_VAL(no[i])) != 0)) {  	    if (prev == 0)  		prev = the_new;  	    else {  		if (prev < the_new) {  		    prev = the_new - prev; 		    the_new = 0; 		}  		inp += prev; 		prev = the_new;  	    }	 	} else { D IDX_ERROR2("Illegal Roman number: position %d in %s.\n", i + 1, no); 	    return (FALSE); 	} 	i++;      }      if (i == ROMAN_MAX) { > 	IDX_ERROR2("Roman page number %s too big (max %d digits).\n", 		   no, ROMAN_MAX); 	return (FALSE);     }      inp += prev;  #     ENTER(inp + page_offset[ROMU]);        if (IS_COMPOSITOR)5 	return (scan_no(&no[i + comp_len], npg, count, &i));      else 	return (TRUE);  }     
 static int #if STDC6 scan_alpha_lower(char no[], short npg[], short *count) #else   scan_alpha_lower(no, npg, count)
 char    no[];  short   npg[]; short  *count; #endif {      short   i;  0     ENTER(ALPHA_VAL(no[0]) + page_offset[ALPL]);  
     i = 1;     if (IS_COMPOSITOR)5 	return (scan_no(&no[comp_len + 1], npg, count, &i));      else 	return (TRUE);  }     
 static int #if STDC6 scan_alpha_upper(char no[], short npg[], short *count) #else   scan_alpha_upper(no, npg, count)
 char    no[];  short   npg[]; short  *count; #endif {      short   i;  0     ENTER(ALPHA_VAL(no[0]) + page_offset[ALPU]);  
     i = 1;     if (IS_COMPOSITOR)5 	return (scan_no(&no[comp_len + 1], npg, count, &i));      else 	return (TRUE);  }     
 static int scan_arg1(VOID_ARG)  {      int     i = 0;0     int     n = 0;		       /* delimiter count */     int     a;       if (compress_blanks)7 	while (((a = GET_CHAR(idx_fp)) == SPC) || (a == TAB));      else 	a = GET_CHAR(idx_fp);  ,     while ((i < ARGUMENT_MAX) && (a != EOF))     { + 	if ((a == idx_quote) || (a == idx_escape)) ) 	{				/* take next character literally */ < 	    key[i++] = (char) a;	/* but preserve quote or escape */ 	    a = GET_CHAR(idx_fp);6 	    key[i++] = (char) a;	/* save literal character */ 	} 	else if (a == idx_aopen) 5 	{		/* opening delimiters within the argument list */  	    key[i++] = (char) a; 	 	    n++;  	} 	else if (a == idx_aclose) 	{( 	    if (n == 0)			/* end of argument */ 	    {+ 		if (compress_blanks && key[i - 1] == SPC)e 		    key[i - 1] = NUL;y 		else 		    key[i] = NUL;i 		return (TRUE); 	    }# 	    else			/* nested delimiters */v 	    { 		key[i++] = (char) a; 		n--; 	    } 	} 	elset 	{ 	    switch (a)f 	    { 	    case LFD: 		idx_lc++;h@ 		IDX_ERROR("Incomplete first argument (premature LFD).\n", ""); 		return (FALSE);  	    case TAB: 	    case SPC:, 		/* compress successive SPC's to one SPC */ 		if (compress_blanks) 		{R@ 		    if ((i > 0) && (key[i - 1] != SPC) && (key[i - 1] != TAB)) 			key[i++] = SPC; 		    break; 		}s
 	    default:  		key[i++] = (char) a; 		break; 	    } 	} 	a = GET_CHAR(idx_fp);     }e  /     flush_to_eol();			/* Skip to end of line */r
     idx_lc++;eC     IDX_ERROR("First argument too long (max %d).\n", ARGUMENT_MAX);.     return (FALSE);e }     
 static int scan_arg2(VOID_ARG)  {f     int     i = 0;     int     a;     int     hit_blank = FALSE;  :     while (((a = GET_CHAR(idx_fp)) == SPC) || (a == TAB));       while (i < NUMBER_MAX) { 	if (a == idx_aclose) {  	    no[i] = NUL;  	    return (TRUE);/ 	} elseo 	    switch (a) {a 	    case LFD: 		idx_lc++;y? IDX_ERROR("Incomplete second argument (premature LFD).\n", "");t 		return (FALSE);_ 	    case TAB: 	    case SPC: 		hit_blank = TRUE;) 		break;
 	    default:E 		if (hit_blank) {/ 		    flush_to_eol();	/* Skip to end of line */_ 		    idx_lc++; E IDX_ERROR("Illegal space within numerals in second argument.\n", "");_ 		    return (FALSE);* 		}o 		no[i++] = (char) a;  		break; 	    } 	a = GET_CHAR(idx_fp);     }t-     flush_to_eol();	/* Skip to end of line */A
     idx_lc++;hB     IDX_ERROR("Second argument too long (max %d).\n", NUMBER_MAX);     return (FALSE);v })     static voida #if STDC0 search_quote(char sort_key[], char actual_key[]) #else," search_quote(sort_key, actual_key) char    sort_key[];( char    actual_key[];  #endif {s2     char   *ptr;		       /* pointer to sort_key */5     char   *sort;		       /* contains sorting text */o     int     char_found = FALSE;i  !     strcpy(actual_key, sort_key);oH     ptr = strchr(sort_key, '"');       /* look for first double quote */     while (ptr != (char*)NULL)     {0 	sort = "";) 	switch (*(ptr + 1))% 	{				/* skip to umlaut or sharp S */o
 	case 'a':
 	case 'A':. 	    sort = isupper(*(ptr + 1)) ? "Ae" : "ae"; 	    break; 
 	case 'o':
 	case 'O':. 	    sort = isupper(*(ptr + 1)) ? "Oe" : "oe"; 	    break;(
 	case 'u':
 	case 'U':. 	    sort = isupper(*(ptr + 1)) ? "Ue" : "ue"; 	    break; 
 	case 's': 	    sort = "ss";  	    break;n	 	default:  	    break;G 	} 	if (sort[0] != NUL) 	{ 	    char_found = TRUE;)3 	    *ptr = sort[0];	       /* write to sort_key */O 	    *(ptr + 1) = sort[1]; 	}7 	ptr = strchr(ptr + 1, '"');    /* next double quote */:     }i5     if (!char_found)		       /* reset actual_key ? */  	actual_key[0] = NUL;)     return;n } 