/* @(#) w1opt.c: 1.4 3/27/84 */ /* w1opt.c ** ** 3B20S optimizer: for one-instruction window ** ** */ /* #include "defs" -- optim.h takes care of this */ #include "optim.h" #include "optutil.h" /* D A N G E R ** ** This definition selects the highest numbered register that we ** can arbitrarily choose as a temporary in the multiply strength ** reduction that is performed below. It should equal the highest ** numbered temporary register used by the compiler (1 less than ** lowest numbered register used for register variables. */ #define TEMPREG 3 /* highest temp. reg. to use */ /* w1opt -- one-instruction optimizer ** ** This routine handles the single-instruction optimization window. ** See individual comments below about what's going on. ** In some cases (which are noted), the optimizations are ordered. */ boolean /* true if we make any changes */ w1opt(pf,pl) register NODE * pf; /* pointer to first instruction in ** window (and last) */ NODE * pl; /* pointer to last instruction in ** window (= pf) */ { register int cop = pf->op; /* single instruction's op code # */ int opn; /* temporary op code number */ char * opst; /* temporary op code string */ boolean retval = false; /* return value: in some cases ** we fall through after completing ** an optimization because it could ** lead into others. This variable ** contains the return state for the ** end. */ char * dest; /* destination string, used below */ long mult; /* multiplier, used below */ boolean f; /* random boolean flag, used below */ long templ; /* general long temporary */ /* eliminate dead code: ** ** op2 O1,R or op3 O1,O2,R ** where R is dead */ if ( isdead(dst(pf),pf) /* we don't really care what the op ** is */ && ! isbr(pf) /* but some branches set variables ** and jump: keep them */ && isdeadcc( pf ) /* this inst. may have side effects ** relied upon by cond. branch */ && isiros( pf->op1 ) && isiros( pf->op2 ) /* are operands safe for mmio */ ) { wchange(); /* Note we're changing the window */ ldelin2(pf); /* preserve line number info */ mvlivecc(pf); /* preserve condition codes line info */ DELNODE(pf); /* discard instruction */ return(true); /* announce success */ } /* ** cmpw R,&0 -> movw R,R ** ** This wins on the 3B20S, but might not on other 3B processors. */ /* Although this improvement is a small win, it messes up other ** transformations and generally confuses things. It's probably ** better to leave it turned off. */ #ifdef CMPWTOMOVW if ( cop == CMPW && isreg(pf->op1) && strcmp(pf->op2,"&0") == 0 ) { wchange(); /* note change */ chgop(pf,MOVW,"movw"); /* change the op code */ pf->op2 = pf->op1; /* both operands point at R */ makelive(pf->op2,pf); /* make register appear to be live ** hereafter so "compare" isn't thrown ** away. (Otherwise R might be dead ** and we would eliminate the inst.) */ return(true); /* made a change */ } #endif /* def CMPWTOMOV */ /* get rid of useless arithmetic ** ** addw2 &0,O -> deleted or cmpw O,&0 ** subw2 &0,O -> deleted or cmpw O,&0 ** orw2 &0,O -> deleted or cmpw O,&0 ** xorw2 &0,O -> deleted or cmpw O,&0 ** alsw2 &0,O -> deleted or cmpw O,&0 ** arsw2 &0,O -> deleted or cmpw O,&0 ** llsw2 &0,O -> deleted or cmpw O,&0 ** lrsw2 &0,O -> deleted or cmpw O,&0 ** mulw2 &1,O -> deleted or cmpw O,&0 ** umulw2 &1,O -> deleted or cmpw O,&0 ** divw2 &1,O -> deleted or cmpw O,&0 ** udivw2 &1,O -> deleted or cmpw O,&0 ** andw2 &-1,O -> deleted or cmpw O,&0 ** addw3 &0,O1,O2 -> movw O1,O2 ** subw3 &0,O1,O2 -> movw O1,O2 ** orw3 &0,O1,O2 -> movw O1,O2 ** xorw3 &0,O1,O2 -> movw O1,O2 ** alsw3 &0,O1,O2 -> movw O1,O2 ** arsw3 &0,O1,O2 -> movw O1,O2 ** llsw3 &0,O1,O2 -> movw O1,O2 ** lrsw3 &0,O1,O2 -> movw O1,O2 ** mulw3 &1,O1,O2 -> movw O1,O2 ** umulw3 &1,O1,O2 -> movw O1,O2 ** divw3 &1,O1,O2 -> movw O1,O2 ** udivw3 &1,O1,O2 -> movw O1,O2 ** andw3 &-1,O1,O2 -> movw O1,O2 ** ** mulw2 &0,O -> movw &0,O ** umulw2 &0,O -> movw &0,O ** andw2 &0,O -> movw &0,O ** mulw3 &0,O1,O2 -> movw &0,O2 ** umulw3 &0,O1,O2 -> movw &0,O2 ** andw3 &0,O1,O2 -> movw &0,O2 ** ** xorw2 &-1,O -> mcomw O,O ** xorw3 &-1,O1,O2 -> mcomw O1,O2 ** ** mulw2 &-1,O -> mnegw O,O ** divw2 &-1,O -> mnegw O,O ** mulw3 &-1,O1,O2 -> mnegw O1,O2 ** divw3 &-1,O1,O2 -> mnegw O1,O2 ** ** Note that since we've already gotten rid of dead code, we won't ** check whether O (O2) is live. However, we must be careful to ** preserve the sense of result indicators if a conditional branch ** follows some of these changes. */ /* Define types of changes we will make.... */ #define UA_NOP 1 /* no change */ #define UA_DEL 2 /* delete instruction */ #define UA_MOV 3 /* change to move */ #define UA_MOVZ 4 /* change to move zero to ... */ #define UA_MCOM 5 /* change to move complemented */ #define UA_MNEG 6 /* change to move negated */ /* We must have a literal as the first operand, and its value must fit ** in an integer. We check the latter condition by comparing the converted ** value from atol to the same value cast as an int: they must agree. ** (This is a particular problem for DMERT when they run the optimizer ** on a PDP-11/70 to optimize 3B20 code.) */ if ( isnumlit(pf->op1) && (templ = atol(pf->op1+1)) == (long)((int) templ) ) { int ultype = UA_NOP; /* initial type of change = none */ switch((int) templ) /* branch on literal */ { case 0: /* handle all instructions with &0 ** as first operand */ switch (cop) { case ADDW2: case SUBW2: case ORW2: case XORW2: case ALSW2: case ARSW2: case LLSW2: case LRSW2: if( !isiros( pf->op2 ) ) break; ultype = UA_DEL; break; /* if safe from mmio, ** delete all of these */ case ADDW3: case SUBW3: case ORW3: case XORW3: case ALSW3: case ARSW3: case LLSW3: case LRSW3: ultype = UA_MOV; /* convert to simple moves */ break; case MULW2: case UMULW2: case ANDW2: ultype = UA_MOVZ; /* convert to move zero */ break; case MULW3: case UMULW3: case ANDW3: if( !isiros( pf->op2 ) ) break; ultype = UA_MOVZ; break; /* if safe from mmio, ** convert to move zero */ } break; /* done &0 case */ case 1: /* &1 case */ switch( cop ) /* branch on op code */ { case DIVW2: case UDIVW2: case MULW2: case UMULW2: if( !isiros( pf->op2 ) ) break; ultype = UA_DEL; break; /* if safe from mmio, ** delete these */ case DIVW3: case UDIVW3: case MULW3: case UMULW3: ultype = UA_MOV; /* convert these to moves */ break; } break; /* done &1 case */ case -1: /* &-1 case */ switch ( cop ) /* branch on op code */ { case ANDW2: if( !isiros( pf->op2 ) ) break; ultype = UA_DEL; break; /* if safe from mmio, ** delete this */ case ANDW3: ultype = UA_MOV; /* change to move */ break; case XORW2: if( !isiros( pf->op2 ) ) break; ultype = UA_MCOM; break; /* if safe from mmio, ** change to move complemented */ case XORW3: ultype = UA_MCOM; /* change to move complemented */ break; case MULW2: case DIVW2: if( !isiros( pf->op2 ) ) break; ultype = UA_MNEG; break; /* if safe from mmio, ** change to move complemented */ case MULW3: case DIVW3: ultype = UA_MNEG; /* change to move negated */ break; } break; /* end &-1 case */ } /* end switch on immediate value */ /* Now do something, based on selections made above */ switch ( ultype ) { case UA_MOV: /* change instruction to move */ wchange(); /* changing window */ pf->op1 = pf->op2; /* shift operands */ pf->op2 = dst(pf); pf->op3 = NULL; /* in case we removed it */ chgop(pf,MOVW,"movw"); /* change op code */ retval = true; /* made a change */ break; case UA_MOVZ: /* change to move zero to operand */ wchange(); pf->op1 = "&0"; /* first operand is zero */ pf->op2 = dst(pf); /* second is ultimate destination */ pf->op3 = NULL; /* clean out if there was one */ chgop(pf,MOVW,"movw"); /* change op code */ retval = true; /* made a change */ break; case UA_MCOM: /* change to move complemented */ wchange(); pf->op1 = pf->op2; /* shift operands */ pf->op2 = dst(pf); pf->op3 = NULL; chgop(pf,MCOMW,"mcomw"); /* change op code */ retval = true; /* made a change */ break; case UA_MNEG: /* change to move negated */ wchange(); pf->op1 = pf->op2; /* shift operands */ pf->op2 = dst(pf); pf->op3 = NULL; chgop(pf,MNEGW,"mnegw"); /* change op code */ retval = true; /* made a change */ break; /* For this case we must be careful: if a following instruction is a ** conditional branch, it is clearly depending on the result of the ** arithmetic, so we must put in a compare against zero instead of deleting ** the instruction. */ case UA_DEL: /* delete instruction */ wchange(); /* we will make a change */ if ( ! isdeadcc(pf) ) { chgop(pf,CMPW,"cmpw"); pf->op1 = pf->op2; /* always test second operand */ pf->op2 = "&0"; /* compare to zero */ pf->op3 = NULL; /* for completeness */ retval = true; /* made a change */ } else { ldelin2(pf); /* preserve line number info */ mvlivecc(pf); /* preserve condition codes line info */ DELNODE(pf); /* not conditional; delete node */ return(true); /* say we changed something */ } break; } /* end case that decides what to do */ cop = pf->op; /* reset current op for changed inst. */ } /* end useless arithmetic removal */ /* discard useless movw's ** ** movw O,O -> deleted ** ** The movw must not be followed by a conditional jump, since we ** must leave the condition codes set. Note that this improvement ** picks up some strange code generated above, like ** ** mulw3 &1,%r0,%r0 -> movw %r0,%r0 ** */ if ( pf->op == MOVW && strcmp(pf->op1,pf->op2) == 0 && isdeadcc(pf) && isiros(pf->op1) /* safe from mmio */ ) { wchange(); /* changing the window */ ldelin2(pf); /* preserve line number info */ mvlivecc(pf); /* preserve condition codes line info */ DELNODE(pf); /* delete the movw */ return(true); } /* change triadics to dyadics if possible ** ** op3 O1,O2,O2 -> op2 O1,O2 ** */ if (istriadic(pf,&opn,&opst) && strcmp(pf->op2,pf->op3) == 0 && isiros(pf->op2) /* safe from mmio */ ) /* triadic and last two operands match */ { wchange(); /* we're making a change */ chgop(pf,opn,opst); /* change the op code */ pf->op3 = NULL; /* so we don't keep looking at 3rd ** operand */ retval = true; /* remember that we made a change, ** but don't exit, as the next ** optimization may also apply. */ cop = opn; /* changed op code */ } /* falling through either way !! */ /* change multiplies and divides to shifts if power of 2 */ /* ** udivw2 &2^n,O -> lrsw2 &n,O ** udivw3 &2^n,O1,O2 -> lrsw3 &n,O1,O2 ** mulw2 &2^n,O -> arsw2 &n,O ** mulw3 &2^n,O1,O2 -> arsw3 &n,O1,O2 ** umulw2 &2^n,O -> llsw2 &n,O ** umulw3 &2^n,O1,O2 -> llsw3 &n,O1,O2 ** ** Note that signed divide cannot safely be altered with these ** transformations! */ switch (cop) /* dispatch on type */ { int bit; /* temporary bit number if 2^n */ case UDIVW2: case UDIVW3: case MULW2: case MULW3: case UMULW2: case UMULW3: if ( (bit = getbit(pf->op1)) < 0) /* if not power of 2, done this */ break; wchange(); /* about to change window */ pf->op1 = getspace(1+2+1); /* room for &dd\0 */ (void) sprintf(pf->op1,"&%d",bit); /* write shift amount */ switch (cop) /* now change op code as needed */ { case UDIVW2: chgop(pf,LRSW2,"lrsw2"); break; case UDIVW3: chgop(pf,LRSW3,"lrsw3"); break; case MULW2: chgop(pf,ALSW2,"alsw2"); break; case MULW3: chgop(pf,ALSW3,"alsw3"); break; case UMULW2: chgop(pf,LLSW2,"llsw2"); break; case UMULW3: chgop(pf,LLSW3,"llsw3"); break; } retval = true; /* we changed something */ cop = pf->op; /* changed op code, too */ } #ifndef M32 /* applies only to 3B20S */ /* The 3B20 can do shifts and adds faster than multiplies for small ** integer multipliers where the result ends up in a register. Here ** we pick such multiplies apart, arbitrarily stopping at 10 as an ** upper bound. */ if ( ( cop == MULW2 || cop == MULW3 ) && *pf->op1 == '&' /* multiplier is small literal */ && isreg(dest = (cop == MULW2 ? pf->op2 : pf->op3)) /* remember dest.; must be reg. */ && (mult = atol(pf->op1+1)) > 2/* positive multiplier (exclude ** 0, 1, 2 */ && mult <= 10 /* arbitrary upper limit */ ) { static char regstring[] = "%rx";/* boiler-plate register name */ char * temp = NULL; /* string representing temp. used ** during "multiply" */ NODE * new; /* pointer to new instruction node */ /* The approach works like this: ** ** 1. Identify destination register and temporary register that ** we will need to use. ** 2. Determine live/dead data for new instructions. ** 3. Add new instructions after the MULW_, but holding on to the ** MULW_ as an anchor (since pf points at it). ** 4. Delete MULW_ when done. ** ** Note that the instruction sequences we create always set the ** result indicators the same as if a multiply had been done. */ /* Macro to add new instruction: ** ptr points to instruction to add after ** opn op code number of new instruction ** opst op code string of instruction ** opn1 operand 1 for new instruction ** opn2 operand 2 for new instruction ** ld live/dead data for new instruction */ #define addinst(ptr,opn,opst,opn1,opn2,ld) \ { \ last = insert(ptr); /* get new node */ \ chgop(last,opn,opst); /* put in op code number, string */ \ last->op1 = opn1; /* put in operands */ \ last->op2 = opn2; \ last->nlive = ld; /* put in live/dead info. */ \ } /* Identify temporary and destination registers. ** ** Destination is already in 'dest': the destination of the MULW_. ** 'temp' must be chosen for MULW2 or for MULW3 where the second ** operand is not already in a register. */ /* f will remember whether we needed to "create" a temporary. ** We must do so for MULW2 or for MULW3 when 2nd operand not a reg. */ if ( f = ( cop == MULW2 /* inst. is MULW2 */ || ! isreg(pf->op2) /* MULW3 2nd operand not reg. */ ) ) { int j; /* register number */ /* We're going to loop, trying to find a register to steal. ** Then we save a copy of the appropriate register string for ** the instructions we'll build. */ for ( j = '0' + TEMPREG; j >= '0'; j--) /* go down from highest */ { regstring[2] = (char) j; /* stick char in string */ if (isdead(regstring,pf)) /* if dead, we can use it */ { temp = strcpy(getspace(sizeof regstring),regstring); break; } } } /* end if that builds a temp. string */ else /* MULW3 had reg. as 2nd operand */ temp = pf->op2; /* point at it as suitable temporary */ /* 'dest' and 'temp' now point at suitable strings representing registers. ** Although we're not sure about the multiplier value, we know powers of ** 2 have already been handled. Therefore there is no danger of allocating ** a string above without actually using it. */ /* Build correct instruction sequence. */ if (temp != NULL) /* make sure we got a temporary */ { NODE * last = pf; /* remember last node in sequence */ int ld; /* current live/dead bits */ wchange(); /* tell the world we're changing */ /* Set up live/dead data. We want to replicate the information in ** the current multiply node, plus we want to set 'temp' and 'dest' ** registers live. */ makelive(dest,pf); /* set destination live */ ld = pf->nlive; /* remember data */ if (cop == MULW3) /* On MULW3 we must load the dest. */ addinst(last,MOVW,"movw",pf->op2,dest,ld); /* Now we change the live/dead information so 'temp' will appear live */ makelive(temp,pf); ld = pf->nlive; /* If we created a temp, move the current destination (multiplicand) ** there now. */ if (f) addinst(last,MOVW,"movw",dest,temp,ld); /* Now generate instruction sequences based on multiplier. */ switch ((int) mult) /* value known to be between 2, 10 */ { case 3: addinst(last,LLSW2,"llsw2","&1",dest,ld); /* *2 */ addinst(last,ADDW2,"addw2",temp,dest,ld); /* *3 */ break; case 5: addinst(last,LLSW2,"llsw2","&2",dest,ld); /* *4 */ addinst(last,ADDW2,"addw2",temp,dest,ld); /* *5 */ break; case 6: addinst(last,LLSW2,"llsw2","&2",dest,ld); /* *4 */ addinst(last,ADDW2,"addw2",temp,dest,ld); /* *5 */ addinst(last,ADDW2,"addw2",temp,dest,ld); /* *6 */ break; case 7: addinst(last,LLSW2,"llsw2","&3",dest,ld); /* *8 */ addinst(last,SUBW2,"subw2",temp,dest,ld); /* *7 */ break; case 9: addinst(last,LLSW2,"llsw2","&3",dest,ld); /* *8 */ addinst(last,ADDW2,"addw2",temp,dest,ld); /* *9 */ break; case 10: addinst(last,LLSW2,"llsw2","&3",dest,ld); /* *8 */ addinst(last,ADDW2,"addw2",temp,dest,ld); /* *9 */ addinst(last,ADDW2,"addw2",temp,dest,ld); /* *10 */ break; } /* Done generating instructions. Now clean up. ** First, fix up live/dead data so temp is dead after last ** instruction. Then kill old node. */ makedead(temp,last); ldelin(pf); /* preserve line number info */ DELNODE(pf); /* delete the multiply */ return(true); /* we-ve changed something */ } /* end if, testing for NULL temp */ } /* end if, testing for multiply */ #endif /* ndef M32 */ #ifndef M32 /* 3B20 only */ /* The 3B20 has special instructions to move a positive nibble to a ** register. It does less well with negative ones, since it puts ** them in a large immediate value. The following transformation ** runs faster for small negative constants. ** ** movw &-n,R -> movw &n,R ** -> mnegw R,R ** ** when 1 <= n <= 15 */ if ( cop == MOVW && isreg(pf->op2) && isnegnib(pf->op1) ) { NODE * new = insert(pf); /* make new node after pf */ wchange(); /* we're making changes */ *(pf->op1 = copyopn(pf->op1,-1,1)) = '&'; /* copy operand, offset by 1, ** overwrite - with & */ chgop(new,MNEGW,"mnegw"); /* fill in new node */ new->op1 = new->op2 = pf->op2; new->nlive = pf->nlive; /* replicate live/dead data for new ** node so it won't go away */ return(true); } #endif /* ndef M32 */ #ifndef M32 /* 3B20 only */ /* From empirical studies it appears that triadic instructions should ** not be used on the 3B20S when the destination is a register, because ** an equivalent two-instruction sequence is faster. The same applies ** in another specialized triadic case below. ** ** op3 O1,O2,R -> movw O2,R ** -> op2 O1,R ** ** if R not used in O1 ** ** ** op3 &n,R,O -> op2 &n,R ** -> movw R,O ** ** if R is dead */ if (istriadic(pf,&opn,&opst)) /* check triadic, save equiv. op code ** number and string */ { /* first case: op3 O1,O2,R */ if ( isreg(pf->op3) && ! usesreg(pf->op1,pf->op3) ) /* check R not used in O1 */ { NODE * new = insert(pf); /* add new node after pf */ wchange(); /* changing something */ chgop(new,opn,opst); /* set up new node with dyadic */ new->op1 = pf->op1; /* set O1 */ new->op2 = pf->op3; /* set R */ new->nlive = pf->nlive; /* propagate live/dead stuff */ chgop(pf,MOVW,"movw"); /* modify original node */ pf->op1 = pf->op2; /* make first operand O2 */ pf->op2 = pf->op3; /* second is R */ pf->op3 = NULL; /* clean up third one */ makelive(pf->op2,pf); /* force R live; R may be dead ** after pl if pl is followed by a ** conditional branch. We must make ** it live here. */ return(true); } else if (isnib(pf->op1) && isdead(pf->op2,pf)) /* (test for register implicit in "isdead") */ /* second case: op3 &n,R,O */ { NODE * new = insert(pf); /* add new following node */ wchange(); /* changing something */ chgop(pf,opn,opst); /* put dyadic in first node */ chgop(new,MOVW,"movw"); /* second node is MOVW */ new->op1 = pf->op2; /* set R */ new->op2 = pf->op3; /* set O */ new->nlive = pf->nlive; /* propagate live/dead in movw */ makelive(pf->op2,pf); /* make R live after op2 */ pf->op3 = NULL; /* clean out 3rd operand of original */ return(true); } } #endif /* ndef M32 */ #ifdef IMPLLSW /* For BELLMAC-32, a shift by one bit is more efficiently ** done as an add. ** ** llsw2 &1,O1 -> addw2 O1,O1 ** ** llsw3 &1,O1,O2 -> addw3 O1,O1,O2 ** */ { if( strcmp( pf->op1, "&1" ) == 0 && isiros(pf->op2) /* safe from mmio */ ) { if( pf->op == LLSW2 ) { chgop( pf, ADDW2, "addw2" ); pf->op1 = pf->op2; return( true ); } if( pf->op == LLSW3 ) { chgop( pf, ADDW3, "addw3" ); pf->op1 = pf->op2; return( true ); } } } #endif /* IMPLLSW */ return(retval); /* indicate whether anything changed */ }