Return to BSD News archive
Newsgroups: comp.bugs.2bsd
Path: euryale.cc.adfa.oz.au!newshost.anu.edu.au!harbinger.cc.monash.edu.au!news.mel.connect.com.au!munnari.OZ.AU!news.ecn.uoknor.edu!news.cis.okstate.edu!news.ksu.ksu.edu!news.physics.uiowa.edu!math.ohio-state.edu!howland.reston.ans.net!newsfeed.internetmci.com!in1.uu.net!news.new-york.net!wlbr!sms
From: sms@wlv.iipo.gtegsc.com (Steven M. Schultz)
Subject: Extremely annoying inefficiency in the C compiler (#289)
Sender: news@wlbr.iipo.gtegsc.com (System Administrator)
Organization: GTE Government Systems, Thousand Oaks CA USA
Message-ID: <DKqtus.LC1@wlbr.iipo.gtegsc.com>
X-Nntp-Posting-Host: wlv.iipo.gtegsc.com
Date: Sat, 6 Jan 1996 04:59:16 GMT
Lines: 221
Subject: Extremely annoying inefficiency in the C compiler (#289)
Index: lib/ccom/c00.c 2.11BSD
Description:
The C compiler generates unnecessary copies of strings into the Data
segment!
Repeat-By:
Copy the following test program to /tmp/x.c
======
#define MSG "This is a test message\n"
main()
{
write(2, MSG, sizeof(MSG));
}
======
and compile with "cc -O -S x.c".
Look at the generated code (reformatted slightly for brevity):
_main:
jsr r5,csv
mov $30,(sp)
mov $L4,-(sp)
mov $2,-(sp)
jsr pc,*$_write
cmp (sp)+,(sp)+
jmp cret
.data
L4:.byte 124,150,151,163,40,151,163,40,141,40,164,145,163,164
.byte 40,155,145,163,163,141,147,145,12,0
L5:.byte 124,150,151,163,40,151,163,40,141,40,164,145,163,164
.byte 40,155,145,163,163,141,147,145,12,0
NOTE the extra (and completely unused) data generated at "L5:". It
is an entire copy of the 'ERR' string corresponding to the
"sizeof (ERR)" in the program.
In fact, the simpler test case:
int i;
i = sizeof ("this is a test");
causes the string to be generated into the object file when only
the size is being asked for.
Fix:
The C compiler is not only counting the characters in the string
(the $30 in the generated code is the correct length of the string)
but is also emitting the string into the object file.
"sizeof string" should NOT cause a copy of the string to be
generated.
This was initially spotted when I did a "strings /bin/csh" and
noticed two instances of "longjmp botch" in the output. Upon
investigating further it was determined that the module setjmperr.c
in lib/libc/pdp/gen used the construct "write(2, MSG, sizeof(MSG));"
and that two copies of the error message were in each program that
called setjmperr().
There were a total of 4 modules in libc.a which could enter duplicate
strings into applications. I have no idea how many applications there
are which use "sizeof (string)" in them but for each use of that
form of 'sizeof' additional D space is taken up.
The fix is fairly simple. In phase 0 of the C compiler when a
STRING is about to be generated the operator stack is scanned
backwards for LPARN (left paren) and SIZEOF operators. Both of these
must be checked for in order to cover all the following cases:
sizeof "foo";
sizeof ("foo");
sizeof (("foo"));
If it is determined that the string is inside a 'sizeof' operator
then a new function is called which only counts the characters in
the string but does not generate any intermediate object code.
To install this fix cut where indicated and save to a file (/tmp/289).
Then:
patch -p0 < /tmp/289
cd /usr/src/lib/ccom
make
make install
make clean
The old version of the C compiler is saved by the "make install"
command (the phases of the compiler are backed to /lib/oc0 and
/lib/oc1 respectively). If anything appears to be compiling wrong
you can fall back to that old version.
Thus far I've recompiled libc.a, the C compiler and 'csh' and
all is well.
You may want to wait to recompile libc.a, there will be an update
coming out soon for libc.a. But if you wish to recompile libc.a
now:
cd /usr/src/lib/libc
make clean
make
make install
make clean
==========================cut here=======================
*** /usr/src/lib/ccom/c00.c.old Sat Jul 3 21:33:38 1993
--- /usr/src/lib/ccom/c00.c Fri Jan 5 20:12:17 1996
***************
*** 1,7 ****
/* C compiler
*
*
- *
* Called from cc:
* c0 source temp1 temp2 [ profileflag ]
* temp1 gets most of the intermediate code;
--- 1,7 ----
/* C compiler
*
+ * 2.1 (2.11BSD) 1996/01/04
*
* Called from cc:
* c0 source temp1 temp2 [ profileflag ]
* temp1 gets most of the intermediate code;
***************
*** 474,479 ****
--- 474,489 ----
strflg = 0;
}
+ cntstr()
+ {
+ register int c;
+
+ nchstr = 1;
+ while ((c = mapch('"')) >= 0) {
+ nchstr++;
+ }
+ }
+
/*
* read a single-quoted character constant.
* The routine is sensitive to the layout of
***************
*** 581,587 ****
int *op, opst[SSIZE], *pp, prst[SSIZE];
register int andflg, o;
register struct nmlist *cs;
! int p, ps, os;
char *svtree;
static struct cnode garbage = { CON, INT, (int *)NULL, (union str *)NULL, 0 };
--- 591,597 ----
int *op, opst[SSIZE], *pp, prst[SSIZE];
register int andflg, o;
register struct nmlist *cs;
! int p, ps, os, xo = 0, *xop;
char *svtree;
static struct cnode garbage = { CON, INT, (int *)NULL, (union str *)NULL, 0 };
***************
*** 634,640 ****
/* fake a static char array */
case STRING:
! putstr(cval, 0);
cs = (struct nmlist *)Tblock(sizeof(struct nmlist));
cs->hclass = STATIC;
cs->hoffset = cval;
--- 644,676 ----
/* fake a static char array */
case STRING:
! /*
! * This hack is to compensate for a bit of simplemindedness I'm not sure how
! * else to fix.
! *
! * i = sizeof ("foobar");
! *
! * or
! * i = sizeof "foobar";
! *
! * would generate ".byte 'f,'o','o,'b,'a,'r,0" into the data segment!
! *
! * What I did here was to scan to "operator" stack looking for left parens
! * "(" preceeded by a "sizeof". If both are seen and in that order or only
! * a SIZEOF is sedn then the string is inside a 'sizeof' and should not
! * generate any data to the object file.
! */
! xop = op;
! while (xop > opst)
! {
! xo = *xop--;
! if (xo != LPARN)
! break;
! }
! if (xo == SIZEOF)
! cntstr();
! else
! putstr(cval, 0);
cs = (struct nmlist *)Tblock(sizeof(struct nmlist));
cs->hclass = STATIC;
cs->hoffset = cval;
*** /VERSION.old Sun Dec 31 12:11:36 1995
--- /VERSION Fri Jan 5 19:38:01 1996
***************
*** 1,4 ****
! Current Patch Level: 288
2.11 BSD
============
--- 1,4 ----
! Current Patch Level: 289
2.11 BSD
============