Annotation of ircnowd/src/portab/ansi2knr.c, Revision 1.1.1.1
1.1 tomglok 1: /* Copyright (C) 1989, 2000 Aladdin Enterprises. All rights reserved. */
2:
3: /* Convert ANSI C function definitions to K&R ("traditional C") syntax */
4:
5: /*
6: ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
7: WARRANTY. No author or distributor accepts responsibility to anyone for the
8: consequences of using it or for whether it serves any particular purpose or
9: works at all, unless he says so in writing. Refer to the GNU General Public
10: License (the "GPL") for full details.
11:
12: Everyone is granted permission to copy, modify and redistribute ansi2knr,
13: but only under the conditions described in the GPL. A copy of this license
14: is supposed to have been given to you along with ansi2knr so you can know
15: your rights and responsibilities. It should be in a file named COPYLEFT,
16: or, if there is no file named COPYLEFT, a file named COPYING. Among other
17: things, the copyright notice and this notice must be preserved on all
18: copies.
19:
20: We explicitly state here what we believe is already implied by the GPL: if
21: the ansi2knr program is distributed as a separate set of sources and a
22: separate executable file which are aggregated on a storage medium together
23: with another program, this in itself does not bring the other program under
24: the GPL, nor does the mere fact that such a program or the procedures for
25: constructing it invoke the ansi2knr executable bring any other part of the
26: program under the GPL.
27: */
28:
29: /*
30: * Usage:
31: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
32: * --filename provides the file name for the #line directive in the output,
33: * overriding input_file (if present).
34: * If no input_file is supplied, input is read from stdin.
35: * If no output_file is supplied, output goes to stdout.
36: * There are no error messages.
37: *
38: * ansi2knr recognizes function definitions by seeing a non-keyword
39: * identifier at the left margin, followed by a left parenthesis, with a
40: * right parenthesis as the last character on the line, and with a left
41: * brace as the first token on the following line (ignoring possible
42: * intervening comments and/or preprocessor directives), except that a line
43: * consisting of only
44: * identifier1(identifier2)
45: * will not be considered a function definition unless identifier2 is
46: * the word "void", and a line consisting of
47: * identifier1(identifier2, <<arbitrary>>)
48: * will not be considered a function definition.
49: * ansi2knr will recognize a multi-line header provided that no intervening
50: * line ends with a left or right brace or a semicolon. These algorithms
51: * ignore whitespace, comments, and preprocessor directives, except that
52: * the function name must be the first thing on the line. The following
53: * constructs will confuse it:
54: * - Any other construct that starts at the left margin and
55: * follows the above syntax (such as a macro or function call).
56: * - Some macros that tinker with the syntax of function headers.
57: */
58:
59: /*
60: * The original and principal author of ansi2knr is L. Peter Deutsch
61: * <ghost@aladdin.com>. Other authors are noted in the change history
62: * that follows (in reverse chronological order):
63:
64: lpd 2000-04-12 backs out Eggert's changes because of bugs:
65: - concatlits didn't declare the type of its bufend argument;
66: - concatlits didn't't recognize when it was inside a comment;
67: - scanstring could scan backward past the beginning of the string; when
68: - the check for \ + newline in scanstring was unnecessary.
69:
70: 2000-03-05 Paul Eggert <eggert@twinsun.com>
71:
72: Add support for concatenated string literals.
73: * ansi2knr.c (concatlits): New decl.
74: (main): Invoke concatlits to concatenate string literals.
75: (scanstring): Handle backslash-newline correctly. Work with
76: character constants. Fix bug when scanning backwards through
77: backslash-quote. Check for unterminated strings.
78: (convert1): Parse character constants, too.
79: (appendline, concatlits): New functions.
80: * ansi2knr.1: Document this.
81:
82: lpd 1999-08-17 added code to allow preprocessor directives
83: wherever comments are allowed
84: lpd 1999-04-12 added minor fixes from Pavel Roskin
85: <pavel_roskin@geocities.com> for clean compilation with
86: gcc -W -Wall
87: lpd 1999-03-22 added hack to recognize lines consisting of
88: identifier1(identifier2, xxx) as *not* being procedures
89: lpd 1999-02-03 made indentation of preprocessor commands consistent
90: lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an
91: endless loop; quoted strings within an argument list
92: confused the parser
93: lpd 1999-01-24 added a check for write errors on the output,
94: suggested by Jim Meyering <meyering@ascend.com>
95: lpd 1998-11-09 added further hack to recognize identifier(void)
96: as being a procedure
97: lpd 1998-10-23 added hack to recognize lines consisting of
98: identifier1(identifier2) as *not* being procedures
99: lpd 1997-12-08 made input_file optional; only closes input and/or
100: output file if not stdin or stdout respectively; prints
101: usage message on stderr rather than stdout; adds
102: --filename switch (changes suggested by
103: <ceder@lysator.liu.se>)
104: lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
105: compilers that don't understand void, as suggested by
106: Tom Lane
107: lpd 1996-01-15 changed to require that the first non-comment token
108: on the line following a function header be a left brace,
109: to reduce sensitivity to macros, as suggested by Tom Lane
110: <tgl@sss.pgh.pa.us>
111: lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
112: undefined preprocessor symbols as 0; changed all #ifdefs
113: for configuration symbols to #ifs
114: lpd 1995-04-05 changed copyright notice to make it clear that
115: including ansi2knr in a program does not bring the entire
116: program under the GPL
117: lpd 1994-12-18 added conditionals for systems where ctype macros
118: don't handle 8-bit characters properly, suggested by
119: Francois Pinard <pinard@iro.umontreal.ca>;
120: removed --varargs switch (this is now the default)
121: lpd 1994-10-10 removed CONFIG_BROKETS conditional
122: lpd 1994-07-16 added some conditionals to help GNU `configure',
123: suggested by Francois Pinard <pinard@iro.umontreal.ca>;
124: properly erase prototype args in function parameters,
125: contributed by Jim Avera <jima@netcom.com>;
126: correct error in writeblanks (it shouldn't erase EOLs)
127: lpd 1989-xx-xx original version
128: */
129:
130: /* Most of the conditionals here are to make ansi2knr work with */
131: /* or without the GNU configure machinery. */
132:
133: #if HAVE_CONFIG_H
134: # include <config.h>
135: #endif
136:
137: #include <stdio.h>
138: #include <ctype.h>
139:
140: #if HAVE_CONFIG_H
141:
142: /*
143: For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
144: This will define HAVE_CONFIG_H and so, activate the following lines.
145: */
146:
147: # if STDC_HEADERS || HAVE_STRING_H
148: # include <string.h>
149: # else
150: # include <strings.h>
151: # endif
152:
153: #else /* not HAVE_CONFIG_H */
154:
155: /* Otherwise do it the hard way */
156:
157: # ifdef BSD
158: # include <strings.h>
159: # else
160: # ifdef VMS
161: extern int strlen(), strncmp();
162: # else
163: # include <string.h>
164: # endif
165: # endif
166:
167: #endif /* not HAVE_CONFIG_H */
168:
169: #if STDC_HEADERS
170: # include <stdlib.h>
171: #else
172: /*
173: malloc and free should be declared in stdlib.h,
174: but if you've got a K&R compiler, they probably aren't.
175: */
176: # ifdef MSDOS
177: # include <malloc.h>
178: # else
179: # ifdef VMS
180: extern char *malloc();
181: extern void free();
182: # else
183: extern char *malloc();
184: extern int free();
185: # endif
186: # endif
187:
188: #endif
189:
190: /* Define NULL (for *very* old compilers). */
191: #ifndef NULL
192: # define NULL (0)
193: #endif
194:
195: /*
196: * The ctype macros don't always handle 8-bit characters correctly.
197: * Compensate for this here.
198: */
199: #ifdef isascii
200: # undef HAVE_ISASCII /* just in case */
201: # define HAVE_ISASCII 1
202: #else
203: #endif
204: #if STDC_HEADERS || !HAVE_ISASCII
205: # define is_ascii(c) 1
206: #else
207: # define is_ascii(c) isascii(c)
208: #endif
209:
210: #define is_space(c) (is_ascii(c) && isspace(c))
211: #define is_alpha(c) (is_ascii(c) && isalpha(c))
212: #define is_alnum(c) (is_ascii(c) && isalnum(c))
213:
214: /* Scanning macros */
215: #define isidchar(ch) (is_alnum(ch) || (ch) == '_')
216: #define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
217:
218: /* Forward references */
219: char *ppdirforward();
220: char *ppdirbackward();
221: char *skipspace();
222: char *scanstring();
223: int writeblanks();
224: int test1();
225: int convert1();
226:
227: /* The main program */
228: int
229: main(argc, argv)
230: int argc;
231: char *argv[];
232: { FILE *in = stdin;
233: FILE *out = stdout;
234: char *filename = 0;
235: char *program_name = argv[0];
236: char *output_name = 0;
237: #define bufsize 5000 /* arbitrary size */
238: char *buf;
239: char *line;
240: char *more;
241: char *usage =
242: "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
243: /*
244: * In previous versions, ansi2knr recognized a --varargs switch.
245: * If this switch was supplied, ansi2knr would attempt to convert
246: * a ... argument to va_alist and va_dcl; if this switch was not
247: * supplied, ansi2knr would simply drop any such arguments.
248: * Now, ansi2knr always does this conversion, and we only
249: * check for this switch for backward compatibility.
250: */
251: int convert_varargs = 1;
252: int output_error;
253:
254: while ( argc > 1 && argv[1][0] == '-' ) {
255: if ( !strcmp(argv[1], "--varargs") ) {
256: convert_varargs = 1;
257: argc--;
258: argv++;
259: continue;
260: }
261: if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
262: filename = argv[2];
263: argc -= 2;
264: argv += 2;
265: continue;
266: }
267: fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name,
268: argv[1]);
269: fprintf(stderr, usage);
270: exit(1);
271: }
272: switch ( argc )
273: {
274: default:
275: fprintf(stderr, usage);
276: exit(0);
277: case 3:
278: output_name = argv[2];
279: out = fopen(output_name, "w");
280: if ( out == NULL ) {
281: fprintf(stderr, "%s: Cannot open output file %s\n",
282: program_name, output_name);
283: exit(1);
284: }
285: /* falls through */
286: case 2:
287: in = fopen(argv[1], "r");
288: if ( in == NULL ) {
289: fprintf(stderr, "%s: Cannot open input file %s\n",
290: program_name, argv[1]);
291: exit(1);
292: }
293: if ( filename == 0 )
294: filename = argv[1];
295: /* falls through */
296: case 1:
297: break;
298: }
299: if ( filename )
300: fprintf(out, "#line 1 \"%s\"\n", filename);
301: buf = malloc(bufsize);
302: if ( buf == NULL )
303: {
304: fprintf(stderr, "Unable to allocate read buffer!\n");
305: exit(1);
306: }
307: line = buf;
308: while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
309: {
310: test: line += strlen(line);
311: switch ( test1(buf) )
312: {
313: case 2: /* a function header */
314: convert1(buf, out, 1, convert_varargs);
315: break;
316: case 1: /* a function */
317: /* Check for a { at the start of the next line. */
318: more = ++line;
319: f: if ( line >= buf + (bufsize - 1) ) /* overflow check */
320: goto wl;
321: if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
322: goto wl;
323: switch ( *skipspace(ppdirforward(more), 1) )
324: {
325: case '{':
326: /* Definitely a function header. */
327: convert1(buf, out, 0, convert_varargs);
328: fputs(more, out);
329: break;
330: case 0:
331: /* The next line was blank or a comment: */
332: /* keep scanning for a non-comment. */
333: line += strlen(line);
334: goto f;
335: default:
336: /* buf isn't a function header, but */
337: /* more might be. */
338: fputs(buf, out);
339: strcpy(buf, more);
340: line = buf;
341: goto test;
342: }
343: break;
344: case -1: /* maybe the start of a function */
345: if ( line != buf + (bufsize - 1) ) /* overflow check */
346: continue;
347: /* falls through */
348: default: /* not a function */
349: wl: fputs(buf, out);
350: break;
351: }
352: line = buf;
353: }
354: if ( line != buf )
355: fputs(buf, out);
356: free(buf);
357: if ( output_name ) {
358: output_error = ferror(out);
359: output_error |= fclose(out);
360: } else { /* out == stdout */
361: fflush(out);
362: output_error = ferror(out);
363: }
364: if ( output_error ) {
365: fprintf(stderr, "%s: error writing to %s\n", program_name,
366: (output_name ? output_name : "stdout"));
367: exit(1);
368: }
369: if ( in != stdin )
370: fclose(in);
371: return 0;
372: }
373:
374: /*
375: * Skip forward or backward over one or more preprocessor directives.
376: */
377: char *
378: ppdirforward(p)
379: char *p;
380: {
381: for (; *p == '#'; ++p) {
382: for (; *p != '\r' && *p != '\n'; ++p)
383: if (*p == 0)
384: return p;
385: if (*p == '\r' && p[1] == '\n')
386: ++p;
387: }
388: return p;
389: }
390: char *
391: ppdirbackward(p, limit)
392: char *p;
393: char *limit;
394: {
395: char *np = p;
396:
397: for (;; p = --np) {
398: if (*np == '\n' && np[-1] == '\r')
399: --np;
400: for (; np > limit && np[-1] != '\r' && np[-1] != '\n'; --np)
401: if (np[-1] == 0)
402: return np;
403: if (*np != '#')
404: return p;
405: }
406: }
407:
408: /*
409: * Skip over whitespace, comments, and preprocessor directives,
410: * in either direction.
411: */
412: char *
413: skipspace(p, dir)
414: char *p;
415: int dir; /* 1 for forward, -1 for backward */
416: {
417: for ( ; ; ) {
418: while ( is_space(*p) )
419: p += dir;
420: if ( !(*p == '/' && p[dir] == '*') )
421: break;
422: p += dir; p += dir;
423: while ( !(*p == '*' && p[dir] == '/') ) {
424: if ( *p == 0 )
425: return p; /* multi-line comment?? */
426: p += dir;
427: }
428: p += dir; p += dir;
429: }
430: return p;
431: }
432:
433: /* Scan over a quoted string, in either direction. */
434: char *
435: scanstring(p, dir)
436: char *p;
437: int dir;
438: {
439: for (p += dir; ; p += dir)
440: if (*p == '"' && p[-dir] != '\\')
441: return p + dir;
442: }
443:
444: /*
445: * Write blanks over part of a string.
446: * Don't overwrite end-of-line characters.
447: */
448: int
449: writeblanks(start, end)
450: char *start;
451: char *end;
452: { char *p;
453: for ( p = start; p < end; p++ )
454: if ( *p != '\r' && *p != '\n' )
455: *p = ' ';
456: return 0;
457: }
458:
459: /*
460: * Test whether the string in buf is a function definition.
461: * The string may contain and/or end with a newline.
462: * Return as follows:
463: * 0 - definitely not a function definition;
464: * 1 - definitely a function definition;
465: * 2 - definitely a function prototype (NOT USED);
466: * -1 - may be the beginning of a function definition,
467: * append another line and look again.
468: * The reason we don't attempt to convert function prototypes is that
469: * Ghostscript's declaration-generating macros look too much like
470: * prototypes, and confuse the algorithms.
471: */
472: int
473: test1(buf)
474: char *buf;
475: { char *p = buf;
476: char *bend;
477: char *endfn;
478: int contin;
479:
480: if ( !isidfirstchar(*p) )
481: return 0; /* no name at left margin */
482: bend = skipspace(ppdirbackward(buf + strlen(buf) - 1, buf), -1);
483: switch ( *bend )
484: {
485: case ';': contin = 0 /*2*/; break;
486: case ')': contin = 1; break;
487: case '{': return 0; /* not a function */
488: case '}': return 0; /* not a function */
489: default: contin = -1;
490: }
491: while ( isidchar(*p) )
492: p++;
493: endfn = p;
494: p = skipspace(p, 1);
495: if ( *p++ != '(' )
496: return 0; /* not a function */
497: p = skipspace(p, 1);
498: if ( *p == ')' )
499: return 0; /* no parameters */
500: /* Check that the apparent function name isn't a keyword. */
501: /* We only need to check for keywords that could be followed */
502: /* by a left parenthesis (which, unfortunately, is most of them). */
503: { static char *words[] =
504: { "asm", "auto", "case", "char", "const", "double",
505: "extern", "float", "for", "if", "int", "long",
506: "register", "return", "short", "signed", "sizeof",
507: "static", "switch", "typedef", "unsigned",
508: "void", "volatile", "while", 0
509: };
510: char **key = words;
511: char *kp;
512: unsigned len = endfn - buf;
513:
514: while ( (kp = *key) != 0 )
515: { if ( strlen(kp) == len && !strncmp(kp, buf, len) )
516: return 0; /* name is a keyword */
517: key++;
518: }
519: }
520: {
521: char *id = p;
522: int len;
523: /*
524: * Check for identifier1(identifier2) and not
525: * identifier1(void), or identifier1(identifier2, xxxx).
526: */
527:
528: while ( isidchar(*p) )
529: p++;
530: len = p - id;
531: p = skipspace(p, 1);
532: if (*p == ',' ||
533: (*p == ')' && (len != 4 || strncmp(id, "void", 4)))
534: )
535: return 0; /* not a function */
536: }
537: /*
538: * If the last significant character was a ), we need to count
539: * parentheses, because it might be part of a formal parameter
540: * that is a procedure.
541: */
542: if (contin > 0) {
543: int level = 0;
544:
545: for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
546: level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
547: if (level > 0)
548: contin = -1;
549: }
550: return contin;
551: }
552:
553: /* Convert a recognized function definition or header to K&R syntax. */
554: int
555: convert1(buf, out, header, convert_varargs)
556: char *buf;
557: FILE *out;
558: int header; /* Boolean */
559: int convert_varargs; /* Boolean */
560: { char *endfn;
561: char *p;
562: /*
563: * The breaks table contains pointers to the beginning and end
564: * of each argument.
565: */
566: char **breaks;
567: unsigned num_breaks = 2; /* for testing */
568: char **btop;
569: char **bp;
570: char **ap;
571: char *vararg = 0;
572:
573: /* Pre-ANSI implementations don't agree on whether strchr */
574: /* is called strchr or index, so we open-code it here. */
575: for ( endfn = buf; *(endfn++) != '('; )
576: ;
577: top: p = endfn;
578: breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
579: if ( breaks == NULL )
580: { /* Couldn't allocate break table, give up */
581: fprintf(stderr, "Unable to allocate break table!\n");
582: fputs(buf, out);
583: return -1;
584: }
585: btop = breaks + num_breaks * 2 - 2;
586: bp = breaks;
587: /* Parse the argument list */
588: do
589: { int level = 0;
590: char *lp = NULL;
591: char *rp = NULL;
592: char *end = NULL;
593:
594: if ( bp >= btop )
595: { /* Filled up break table. */
596: /* Allocate a bigger one and start over. */
597: free((char *)breaks);
598: num_breaks <<= 1;
599: goto top;
600: }
601: *bp++ = p;
602: /* Find the end of the argument */
603: for ( ; end == NULL; p++ )
604: { switch(*p)
605: {
606: case ',':
607: if ( !level ) end = p;
608: break;
609: case '(':
610: if ( !level ) lp = p;
611: level++;
612: break;
613: case ')':
614: if ( --level < 0 ) end = p;
615: else rp = p;
616: break;
617: case '/':
618: if (p[1] == '*')
619: p = skipspace(p, 1) - 1;
620: break;
621: case '"':
622: p = scanstring(p, 1) - 1;
623: break;
624: default:
625: ;
626: }
627: }
628: /* Erase any embedded prototype parameters. */
629: if ( lp && rp )
630: writeblanks(lp + 1, rp);
631: p--; /* back up over terminator */
632: /* Find the name being declared. */
633: /* This is complicated because of procedure and */
634: /* array modifiers. */
635: for ( ; ; )
636: { p = skipspace(p - 1, -1);
637: switch ( *p )
638: {
639: case ']': /* skip array dimension(s) */
640: case ')': /* skip procedure args OR name */
641: { int level = 1;
642: while ( level )
643: switch ( *--p )
644: {
645: case ']': case ')':
646: level++;
647: break;
648: case '[': case '(':
649: level--;
650: break;
651: case '/':
652: if (p > buf && p[-1] == '*')
653: p = skipspace(p, -1) + 1;
654: break;
655: case '"':
656: p = scanstring(p, -1) + 1;
657: break;
658: default: ;
659: }
660: }
661: if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
662: { /* We found the name being declared */
663: while ( !isidfirstchar(*p) )
664: p = skipspace(p, 1) + 1;
665: goto found;
666: }
667: break;
668: default:
669: goto found;
670: }
671: }
672: found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
673: { if ( convert_varargs )
674: { *bp++ = "va_alist";
675: vararg = p-2;
676: }
677: else
678: { p++;
679: if ( bp == breaks + 1 ) /* sole argument */
680: writeblanks(breaks[0], p);
681: else
682: writeblanks(bp[-1] - 1, p);
683: bp--;
684: }
685: }
686: else
687: { while ( isidchar(*p) ) p--;
688: *bp++ = p+1;
689: }
690: p = end;
691: }
692: while ( *p++ == ',' );
693: *bp = p;
694: /* Make a special check for 'void' arglist */
695: if ( bp == breaks+2 )
696: { p = skipspace(breaks[0], 1);
697: if ( !strncmp(p, "void", 4) )
698: { p = skipspace(p+4, 1);
699: if ( p == breaks[2] - 1 )
700: { bp = breaks; /* yup, pretend arglist is empty */
701: writeblanks(breaks[0], p + 1);
702: }
703: }
704: }
705: /* Put out the function name and left parenthesis. */
706: p = buf;
707: while ( p != endfn ) putc(*p, out), p++;
708: /* Put out the declaration. */
709: if ( header )
710: { fputs(");", out);
711: for ( p = breaks[0]; *p; p++ )
712: if ( *p == '\r' || *p == '\n' )
713: putc(*p, out);
714: }
715: else
716: { for ( ap = breaks+1; ap < bp; ap += 2 )
717: { p = *ap;
718: while ( isidchar(*p) )
719: putc(*p, out), p++;
720: if ( ap < bp - 1 )
721: fputs(", ", out);
722: }
723: fputs(") ", out);
724: /* Put out the argument declarations */
725: for ( ap = breaks+2; ap <= bp; ap += 2 )
726: (*ap)[-1] = ';';
727: if ( vararg != 0 )
728: { *vararg = 0;
729: fputs(breaks[0], out); /* any prior args */
730: fputs("va_dcl", out); /* the final arg */
731: fputs(bp[0], out);
732: }
733: else
734: fputs(breaks[0], out);
735: }
736: free((char *)breaks);
737: return 0;
738: }
CVSweb