N /****************************************************************************/ /*									    */ ) /*  FACILITY:	Routine Analyzer					    */  /*									    */ ( /*  MODULE:	C Language Parser					    */ /*									    */ G /*  AUTHOR:	Steve Branam, Network Product Support Group, Digital	    */ 6 /*		Equipment Corporation, Littleton, MA, USA.		    */ /*									    */ M /*  DESCRIPTION: This module contains the source parser for C language	    */ K /*  source files. Note that this particular implementation is a very	    */ N /*  rudimentary state-driven parser. While it is reasonably functional, it  */N /*  is possible that it may become confused by unusual but otherwise valid  */$ /*  language constructs.						    */ /*									    */ " /*  REVISION HISTORY:							    */ /*									    */ / /*  V0.1-01 10-FEB-1995 Steve Branam					    */  /*									    */ I /*	Allow spaces between # and keyword in preprocessor directives.      */  /*									    */ / /*  V0.1-00 24-AUG-1994 Steve Branam					    */  /*									    */   /*	Original version.						    */ /*									    */ N /****************************************************************************/   #include <stdio.h> #include <ctype.h> #include "ranalyzer.h" #include "parser.h"   8 #define MAX_DQUOTE_LEN	    512			/* Just a guess.	    */0 #define MAX_SQUOTE_LEN	    4			/* '\123'		    */    ( #define cond_token(t)	(copy ? t : SPACE)   typedef enum { 
     NO_MACRO,      IN_MACRO } c_macro_states;    typedef enum {      FIND_START,      FIND_END_ALNUM,      FIND_END_NUMBER,     FIND_END_SPACE,      FIND_END_DIRECTIVE,      FIND_END_DQUOTED,      FIND_END_SQUOTED,      FIND_END_COMMENT } c_scanner_states;    typedef enum {      FIND_IDENT,      FIND_DEF_LPAREN,     FIND_DEF_RPAREN,     FIND_LBRACE_OR_SEMICOLON,      FIND_LBRACE,     FIND_MACRO_IDENT,      FIND_MACRO_LPAREN,     IN_ROUTINE,      FIND_REF_LPAREN  } c_parser_states;   static char  	*mPSNames[] = { 	    "FIND_IDENT", 	    "FIND_DEF_LPAREN",  	    "FIND_DEF_RPAREN",   	    "FIND_LBRACE_OR_SEMICOLON", 	    "FIND_LBRACE",  	    "FIND_MACRO_IDENT", 	    "FIND_MACRO_LPAREN",  	    "IN_ROUTINE", 	    "FIND_REF_LPAREN" 	};  	  typedef enum {      END_C_SOURCE,      LPAREN,      RPAREN,      LBRACE,      RBRACE,      SEMICOLON,     IDENTIFIER,      KEYWORD,
     MACBEGIN, 
     SPACE,	     OTHER  } c_token_types;   static	char  	*keywords[] = { "auto", 			"break", 
 			"case",
 			"char", 			"continue",
 			"default",  			"do", 			"double",
 			"else", 			"entry", 
 			"enum", 			"extern", 			"float", 	 			"for", 
 			"goto", 			"if",	 			"int", 
 			"long", 			"register", 			"return", 			"sizeof", 			"short",  			"static", 			"struct", 			"switch",
 			"typedef",  			"union",  			"unsigned",
 			"void", 			"while", 
 			NULL };  2 static  int				    /* Statement char count.	    */ 	statement; 0 static  int				    /* Comment char count.	    */	 	comment;   N /*************************************************************************++*/ void trace_parser(K /* Write a parser trace message to the listing file. Listing must be	    */ = /* enabled. Parser tracing is assumed to be enabled.			    */        char    *aTraceStr% 	    /* (READ, BY ADDR):  					    */ 2 	    /* String to write to listing file.				    */  & )	/* No return value.      					    */G 	/*****************************************************************--*/    {      if (list_enabled()) {  	fputs(aTraceStr, list_file());  	restore_list_column();      }  }   N /*************************************************************************++*/ void trace_parser_int(N /* Write a parser trace message containing one integer value to the listing */N /* file. Listing must be enabled. Parser tracing is assumed to be enabled.  */       char    *aTraceStr, % 	    /* (READ, BY ADDR):  					    */ F 	    /* String (including one integer printf format control) to	    */) 	    /* write to listing file.					    */  	          int	    vInt# 	    /* (READ, BY VAL):						    */ 2 	    /* Integer value to write in string.			    */  & )	/* No return value.      					    */G 	/*****************************************************************--*/    {      if (list_enabled()) { ' 	fprintf(list_file(), aTraceStr, vInt);  	restore_list_column();      }  }   N /*************************************************************************++*/ void trace_parser_state(N /* Write a parser state change trace message to the listing file. Listing   */D /* must be enabled. Parser tracing is assumed to be enabled.		    */       char    *aFromName, % 	    /* (READ, BY ADDR):  					    */ 5 	    /* Name string for state changing from.			    */  	          char    *aToName% 	    /* (READ, BY ADDR):  					    */ 3 	    /* Name string for state changing to.			    */  	     & )	/* No return value.      					    */G 	/*****************************************************************--*/    {      if (list_enabled()) { A 	fprintf(list_file(), "\nTRACE: Parser state change: %s to %s\n",  	    aFromName, aToName);  	restore_list_column();      }  }   N /*************************************************************************++*/ void new_source_line( A /* Updates source line counters when a new line is found.		    */        SOURCEFILE 	    *aSourceRecord " 		/* (MODIFY, BY ADDR):					    */: 		/* Source file information record. The line count	    */* 		/* statistics will be updated.				    */    )	/* No return value						    */G 	/*****************************************************************--*/    {      /*>     ** Classify the source line just completed as either mixedI     ** statements/comments, statements only, comments only, or blank, and 8     ** increment the appropriate source record counters.     */          if (statement && comment) { ! 	inc_source_mixed(aSourceRecord);      }      else if (statement){& 	inc_source_statements(aSourceRecord);     }      else if (comment) { $ 	inc_source_comments(aSourceRecord);     } 
     else {! 	inc_source_empty(aSourceRecord);      }      :     statement = 0;			    /* Reset counters for next	    */*     comment   = 0;			    /* line.			    */  !     new_list_line(aSourceRecord);  }   N /*************************************************************************++*/ static int iskeyword( N /* Determines whether or not an alphanumeric token is a source language	    */ /* keyword.								    */        char    *aKeywords[], $ 	    /* (READ, BY ADDR):						    */G 	    /* List of known source language keyword string pointers, in    */ = 	    /* alphabetical order, terminated by NULL entry.		    */        char    *aToken $ 	    /* (READ, BY ADDR):						    */) 	    /* Token string to check.					    */   , )	/* Returns status of comparison:				    */' 	/*	1   - Token is a keyword.				    */ + 	/*	0   - Token is not a keyword.				    */ G 	/*****************************************************************--*/    { 7     int	    cmpstat;			    /* Comparison status.	    */   O     while (*aKeywords != NULL && (cmpstat = strcmp(*aKeywords++, aToken)) < 0);      return !cmpstat; }   N /*************************************************************************++*/ static int evaluate_if( > /* Evaluates a #if conditional compilation directive.			    */ /*									    */ : /* WARNING: This is just a dummy version for now.			    */       char    *aCondition % 	    /* (READ, BY ADDR):  					    */ 2 	    /* Condition string to be evaluated.			    */  C )	/* Returns boolean flag to indicate results of evaluation.	    */ G 	/*****************************************************************--*/    { A     return atoi(aCondition);	/* Handle simple numeric constant */  }   N /*************************************************************************++*/ static int evaluate_ifdef(@ /* Evaluates a #ifdef conditional compilation directive.		    */ /*									    */ : /* WARNING: This is just a dummy version for now.			    */       char    *aCondition % 	    /* (READ, BY ADDR):  					    */ 2 	    /* Condition string to be evaluated.			    */  C )	/* Returns boolean flag to indicate results of evaluation.	    */*G 	/*****************************************************************--*/r   { 1     return 0; /* Nothing is defined right now. */P }e  N /*************************************************************************++*/ static get_token( N /* Source file input scanner. Reads the next lexical token from the source  */9 /* file and accumulates source line statistics.				    */o       FILE    *aSourceFile,r  		/* (READ, BY ADDR):					    */0 		/* Source file containing C language.			    */       SOURCEFILE 	    *aSourceRecord," 		/* (MODIFY, BY ADDR):					    */: 		/* Source file information record. The line count	    */* 		/* statistics will be updated.				    */       char    *aToken ! 		/* (WRITE, BY ADDR):					    */ - 		/* String buffer to receive token.			    */v  B )	/* Returns code indicating which type of token was found:	    */7 	/*     END_C_SOURCE - End of the source file.			    */	0 	/*     LPAREN	    - Left parenthesis.				    */0 	/*     RPAREN	    - Right parenthesis.			    */* 	/*     LBRACE	    - Left brace.				    */+ 	/*     RBRACE	    - Right brace.				    */*+ 	/*     SEMICOLON    - Semicolon.				    */ 9 	/*     IDENTIFIER   - Routine or data identifier		    */r2 	/*     KEYWORD	    - C language keyword.			    */3 	/*     MACBEGIN	    - Beginning of macro.			    */') 	/*     SPACE	    - Whitespace.				    */ 6 	/*     OTHER	    - Some other type of token.			    */G 	/*****************************************************************--*/E   {N2     int	    ch;				    /* Input character.		    */=     int	    quotelen;			    /* Length of quoted token.	    */U4     c_scanner_states			    /* Scanner state.		    */ 	    state = FIND_START;C     char    *nextchar = aToken;		    /* Pointer to next char	    */_& 					    /* position in aToken.	    */A     char    condbuf[256];		    /* Conditional directive buf.   */D8     static int				    /* Conditional compilation copy */& 	    copy = 1;			    /* flag.			    */6     static c_macro_states		    /* Macro state.		    */ 	    macro = NO_MACRO;       do { 	ch = fgetc(aSourceFile);T 	switch (state) {O 	case FIND_START:N 	    list_char(ch);D1 	    if (isalpha(ch) || ch == '_' || ch == '$') {_ 		state = FIND_END_ALNUM;  		*nextchar++ = ch;  		statement++; 	    } 	    else if (isdigit(ch)) { 		state = FIND_END_NUMBER; 		*nextchar++ = ch;  		statement++; 	    } 	    else if (isspace(ch)) { 		if (ch == '\n') {	@ 		    if (macro == IN_MACRO) {/* Special case: if end of line */9 			macro = NO_MACRO;   /* in a macro, this is really   */n 			ungetc(ch, aSourceFile); 9 			return cond_token(RBRACE); /* end of a "routine"    */,$ 		    }			    /* definition.		    */ 		    else {" 			new_source_line(aSourceRecord); 		    }" 		}	 		state = FIND_END_SPACE;  	    } 	    else {" 		switch (ch) {  		case '(':	 		    statement++;  		    return cond_token(LPAREN); 		    break; 		case ')':  		    statement++;  		    return cond_token(RPAREN); 		    break; 		case '{':* 		    statement++;  		    return cond_token(LBRACE); 		    break; 		case '}':g 		    statement++;  		    return cond_token(RBRACE); 		    break; 		case ';':u 		    statement++;# 		    return cond_token(SEMICOLON);  		    break; 		case '#':	 		    statement++; 		    *nextchar++ = ch;f! 		    state = FIND_END_DIRECTIVE;l 		    break; 		case '"':* 		    statement++; 		    state = FIND_END_DQUOTED;* 		    quotelen = 0;  		    break; 		case '\'': 		    statement++; 		    state = FIND_END_SQUOTED;) 		    quotelen = 0;* 		    break; 		case '/':* 		    ch = fgetc(aSourceFile); 		    if (ch == '*') { 			list_char(ch);W 			state = FIND_END_COMMENT; 			comment += 2; 		    }t 		    else { 			ungetc(ch, aSourceFile);e 			statement++;a 		    }s 		    break;
 		default: 		    if (ch != EOF) { 			*nextchar++ = ch; 			*nextchar   = '\0'; 			statement++;l 			return cond_token(OTHER); 		    }) 		}  	    } 	    break;o 	case FIND_END_ALNUM:*1 	    if (isalnum(ch) || ch == '_' || ch == '$') {) 		list_char(ch); 		*nextchar++ = ch;o 		statement++; 	    } 	    else {  		ungetc(ch, aSourceFile); 		*nextchar = '\0';*$ 		if (iskeyword(keywords, aToken)) {! 		    return cond_token(KEYWORD);t 		}l 		else {$ 		    return cond_token(IDENTIFIER); 		}r 	    } 	    break;  	case FIND_END_NUMBER: 	    if (isdigit(ch)) {* 		list_char(ch); 		*nextchar++ = ch;* 		statement++; 	    } 	    else {i 		ungetc(ch, aSourceFile); 		*nextchar = '\0';t 		return cond_token(OTHER);s 	    } 	    break;t 	case FIND_END_SPACE:  	    if (isspace(ch)) {h 		list_char(ch); 		if (ch == '\n') {D@ 		    if (macro == IN_MACRO) {/* Special case: if end of line */9 			macro = NO_MACRO;   /* in a macro, this is really   */D 			ungetc(ch, aSourceFile);aB 			return cond_token(RBRACE);	    /* the end of a "routine"	    */$ 		    }			    /* definition.		    */ 		    else {" 			new_source_line(aSourceRecord); 		    }  		}  	    } 	    else {) 		ungetc(ch, aSourceFile); 		*nextchar = '\0';e 		return cond_token(SPACE);a 	    } 	    break;  	case FIND_END_DIRECTIVE:  	    if (isalpha(ch)) {* 		list_char(ch); 		*nextchar++ = ch;* 		statement++; 	    }6 	    else if (*(nextchar - 1) == '#' && isspace(ch)) {H 	        list_char(ch);         /* Ignore space between # and keyword */ 		statement++; 	    } 	    else {/ 		ungetc(ch, aSourceFile); 		*nextchar = '\0';o# 		if (!strcmp(aToken, "#define")) {p 		    macro = IN_MACRO;N" 		    return cond_token(MACBEGIN); 		}*$ 		else if (!strcmp(aToken, "#if")) {3 		    fgets(condbuf, sizeof(condbuf), aSourceFile);h 		    if (list_enabled()) {a 			fputs(condbuf, list_file());/ 		    } % 		    new_source_line(aSourceRecord);k" 		    copy = evaluate_if(condbuf); 		    return SPACE;u 		}.' 		else if (!strcmp(aToken, "#ifdef")) {e3 		    fgets(condbuf, sizeof(condbuf), aSourceFile);  		    if (list_enabled()) {s 			fputs(condbuf, list_file());  		    } % 		    new_source_line(aSourceRecord);m% 		    copy = evaluate_ifdef(condbuf);{ 		    return SPACE;S 		}R( 		else if (!strcmp(aToken, "#ifndef")) {3 		    fgets(condbuf, sizeof(condbuf), aSourceFile);  		    if (list_enabled()) {/ 			fputs(condbuf, list_file());r 		    } % 		    new_source_line(aSourceRecord);*& 		    copy = !evaluate_ifdef(condbuf); 		    return SPACE;d 		}*& 		else if (!strcmp(aToken, "#else")) { 		    copy = !copy;l 		    return SPACE;y 		}	' 		else if (!strcmp(aToken, "#endif")) {] 		    copy = 1;, 		    return SPACE;/ 		}  		else 		{n 		    return cond_token(OTHER);g 		}t 	    } 	    break;/ 	case FIND_END_DQUOTED:m 	    list_char(ch);	 	    if (ch == '"') {  		statement++; 		return cond_token(OTHER);/ 	    }; 	    else if (ch == '\\') {	    /* Check for escape seq. */f 		statement++;
 		quotelen++; ! 	        ch = fgetc(aSourceFile);/ 		list_char(ch); 		statement++;
 		quotelen++;* 	    } 	    else if (ch == '\n') {*! 		new_source_line(aSourceRecord);  	    }* 	    else if (quotelen > MAX_DQUOTE_LEN) {	 		printf( G 	    "WARNING: Suspected unterminated double quote at line %d of %s\n",)> 		    source_line(aSourceRecord), source_name(aSourceRecord)); 		return cond_token(OTHER);* 	    } 	    else {t 		statement++;
 		quotelen++;t 	    } 	    break;c 	case FIND_END_SQUOTED:  	    list_char(ch);* 	    if (ch == '\'') { 		ch = fgetc(aSourceFile); 		if (ch == '\'') {r 		    statement++; 		    list_char(ch); 		}	 		else { 		    ungetc(ch, aSourceFile); 		}t 		statement++; 		return cond_token(OTHER);i 	    }8 	    else if (ch == '\n' || quotelen > MAX_SQUOTE_LEN) {! 		new_source_line(aSourceRecord);*	 		printf( G 	    "WARNING: Suspected unterminated single quote at line %d of %s\n", > 		    source_line(aSourceRecord), source_name(aSourceRecord)); 		return cond_token(OTHER);i 	    } 	    else {* 		statement++;
 		quotelen++;  	    } 	    break;. 	case FIND_END_COMMENT:* 	    list_char(ch);i 	    if (ch == '*') {f 		ch = fgetc(aSourceFile); 		if (ch == '/') { 		    list_char(ch); 		    state = FIND_START;d 		    comment += 2;a 		}d 		else { 		    ungetc(ch, aSourceFile); 		    comment++; 		}a 	    } 	    else if (ch == '\n') {*! 		new_source_line(aSourceRecord);* 	    } 	    else {  		comment++; 	    } 	    break;e 	}     } while (ch != EOF);
     copy = 1;*     return END_C_SOURCE; }*     N /*************************************************************************++*/ language_element c_parser(M /* Parses C source language statements, looking for routine definition	    */ L /* begin and end, and routine references. Retrieves the next language	    */* /* element in the source file.						    */ /*									    */cN /* Note that this version is a very simple-minded parser, and has several   */M /* limitations.  It is not able to identify function pointer usages as	    */ N /* routine references. It may also be confused by other legal constructs.   */       FILE    *aSourceFile,c  		/* (READ, BY ADDR):					    */@ 		/* Source file containing C language. Must be opened by	    */ 		/* caller.						    */       SOURCEFILE 	    *aSourceRecord,  		/* (READ, BY ADDR):					    */- 		/* Source file information record.			    */	       char    *aElement,! 		/* (WRITE, BY ADDR):					    */ @ 		/* String buffer that will receive the recognized source    */! 		/* language element.					    */        long    *aSourceLine! 		/* (WRITE, BY ADDR):					    */N@ 		/* Buffer that will receive the line number of aElement.    */  F )	/* Returns one of the following values indicating the type of	    */* 	/* element output in aElement:					    */C 	/*      PARSE_ERROR	    - An error was detected in the input    */r 	/*			      stream.				    */t@ 	/*	END_OF_SOURCE	    - The normal end of file was found.	    */B 	/*	ROUTINE_DEF_BEGIN   - The beginning of a routine definition */ 	/*			      was found.			    */ ? 	/*	ROUTINE_DEF_END	    - The end of the current routine	    */ ) 	/*			      definition was found.		    */t> 	/*	ROUTINE_REF	    - A routine reference (call) was found. */G 	/*****************************************************************--*/c   {o8     static c_parser_states		    /* Parser state.		    */ 	    state = FIND_IDENT;4     static int				    /* Nested braces level.	    */ 	    blevel;9     static char				    /* Name of current routine.	    */ & 	    curdefname[MAX_ROUTINE_NAME + 1];<     int	    plevel;			    /* Nested parenthesis level.    */7     c_token_types			    /* Type of source token.	    */	 	    tokentype; J     char    token[MAX_ROUTINE_NAME + 1];    /* Source token buffer.	    */       /*J     ** This function operates as a state machine. The states represent theO     ** various tokens expected next in the token stream, according to C syntax.*O     ** Whenever a routine definition beginning or end, or routine reference, is	H     ** recognized, the parser returns to the caller. However, context isI     ** maintained between calls to the parser via static state variables.;     */       do {: 	tokentype = get_token(aSourceFile, aSourceRecord, token); 	switch (state) {n 	case FIND_IDENT:	# 	    if (tokentype == IDENTIFIER) {m 		strcpy(aElement, token);, 		*aSourceLine = source_line(aSourceRecord);! 		change_pstate(FIND_DEF_LPAREN);o 	    }& 	    else if (tokentype == MACBEGIN) {" 		change_pstate(FIND_MACRO_IDENT); 	    } 	    break;  	case FIND_DEF_LPAREN: 	    if (tokentype == LPAREN) {+! 		change_pstate(FIND_DEF_RPAREN);R 		paren_level_zero();  	    }( 	    else if (tokentype == IDENTIFIER) { 		strcpy(aElement, token);, 		*aSourceLine = source_line(aSourceRecord); 	    }# 	    else if (tokentype != SPACE) {) 		change_pstate(FIND_IDENT); 	    }		 	    break;	 	case FIND_DEF_RPAREN: 	    if (tokentype == RPAREN) {l 		if (plevel) {	 		    paren_level_dec(); 		}c 		else {. 		    change_pstate(FIND_LBRACE_OR_SEMICOLON); 		}) 	    }$ 	    else if (tokentype == LPAREN) { 		paren_level_inc(); 	    } 	    break;t 	case FIND_LBRACE_OR_SEMICOLON:0 	    if (tokentype == LBRACE) {o 		change_pstate(IN_ROUTINE); 		block_level_zero();o 		strcpy(curdefname, token); 		return ROUTINE_DEF_BEGIN; ( 	    }					/* Forward or external	    */? 	    else if (tokentype == SEMICOLON) {	/* declaration.		    */, 		change_pstate(FIND_IDENT); 	    }B 	    else if (tokentype != SPACE) {	/* Parameter declarations.  */ 		change_pstate(FIND_LBRACE);u 	    } 	    break;I 	case FIND_LBRACE:? 	    if (tokentype == LBRACE) {		/* Keep grabbing tokens	    */	8 		change_pstate(IN_ROUTINE);	/* until left brace.	    */ 		block_level_zero();e 		strcpy(curdefname, token); 		return ROUTINE_DEF_BEGIN;o 	    } #if 0 # 	    else if (tokentype != SPACE) {A 		change_pstate(FIND_IDENT); 	    } #endif 	    break;= 	case FIND_MACRO_IDENT:r# 	    if (tokentype == IDENTIFIER) {n 		strcpy(aElement, token);, 		*aSourceLine = source_line(aSourceRecord);# 		change_pstate(FIND_MACRO_LPAREN);r 	    }# 	    else if (tokentype != SPACE) {u 		change_pstate(FIND_IDENT); 	    } 	    break;  	case FIND_MACRO_LPAREN: 	    if (tokentype == LPAREN) {  		change_pstate(IN_ROUTINE); 		strcpy(curdefname, token); 		return ROUTINE_DEF_BEGIN;n 	    }2 	    else {			    /* Cannot tolerate SPACE here */ 		change_pstate(FIND_IDENT); 	    } 	    break;  	case IN_ROUTINE:* 	    if (tokentype == LBRACE) {s 		block_level_inc(); 	    }$ 	    else if (tokentype == RBRACE) { 		if (blevel == 0) { 		    trace_blmsg(BLEND);   		    change_pstate(FIND_IDENT);0 		    *aSourceLine = source_line(aSourceRecord);# 		    strcpy(aElement, curdefname);e 		    return ROUTINE_DEF_END;N 		}  		else { 		    block_level_dec(); 		}l 	    }( 	    else if (tokentype == IDENTIFIER) { 		strcpy(aElement, token);, 		*aSourceLine = source_line(aSourceRecord);! 		change_pstate(FIND_REF_LPAREN);  	    }* 	    else if (tokentype == END_C_SOURCE) {. 		printf("ERROR: Unexpected end of file %s\n"," 		    source_name(aSourceRecord)); 		return PARSE_ERROR;( 	    } 	    break;u 	case FIND_REF_LPAREN: 	    if (tokentype != SPACE) { 		change_pstate(IN_ROUTINE); 		if (tokentype == RBRACE) {+ 					    /* Must be scanner finding end  */(+ 					    /* of macro, calling it rbrace. */e? 		    if (blevel == 0) {	    /* Treat as end of routine.	    */o 			trace_blmsg(lB 		    "\nTRACE: brace level already 0 (assuming end of macro)\n"); 			change_pstate(FIND_IDENT);n- 			*aSourceLine = source_line(aSourceRecord);f  			strcpy(aElement, curdefname); 			return ROUTINE_DEF_END; 		    }s/ 		    else {		    /* Must be end of data	    */y2 			block_level_dec();  /* initializer list.	    */7 			trace_blmsg("(assuming end of data initializer)\n");  		    }u 		}n 	    } 	    if (tokentype == LPAREN) {k 		return ROUTINE_REF;T 	    } 	    break;c 	}(     } while (tokentype != END_C_SOURCE);     change_pstate(FIND_IDENT);     return END_OF_SOURCE;\ } 