[commit] r2340 - OSCogVM StackToRegisterMappingCogit as per VMMaker-oscog.42.

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[commit] r2340 - OSCogVM StackToRegisterMappingCogit as per VMMaker-oscog.42.

commits-3
 
Author: eliot
Date: 2011-01-01 14:51:31 -0800 (Sat, 01 Jan 2011)
New Revision: 2340

Modified:
   branches/Cog/src/vm/cogit.c
   branches/Cog/src/vm/cogit.h
Log:
OSCogVM StackToRegisterMappingCogit as per VMMaker-oscog.42.
Fix rounding bug causing underestimate of openPICSize and
resultant hard crashes.


Modified: branches/Cog/src/vm/cogit.c
===================================================================
--- branches/Cog/src/vm/cogit.c 2011-01-01 22:45:04 UTC (rev 2339)
+++ branches/Cog/src/vm/cogit.c 2011-01-01 22:51:31 UTC (rev 2340)
@@ -1,9 +1,9 @@
 /* Automatically generated by
  CCodeGenerator VMMaker-oscog.42 uuid: d3b303c1-306e-4343-b078-655fc2bfc436
    from
- SimpleStackBasedCogit VMMaker-oscog.42 uuid: d3b303c1-306e-4343-b078-655fc2bfc436
+ StackToRegisterMappingCogit VMMaker-oscog.42 uuid: d3b303c1-306e-4343-b078-655fc2bfc436
  */
-static char __buildInfo[] = "SimpleStackBasedCogit VMMaker-oscog.42 uuid: d3b303c1-306e-4343-b078-655fc2bfc436 " __DATE__ ;
+static char __buildInfo[] = "StackToRegisterMappingCogit VMMaker-oscog.42 uuid: d3b303c1-306e-4343-b078-655fc2bfc436 " __DATE__ ;
 char *__cogitBuildInfo = __buildInfo;
 
 
@@ -83,14 +83,6 @@
 
 
 typedef struct {
- AbstractInstruction *targetInstruction;
- sqInt instructionIndex;
- } BytecodeFixup;
-
-#define CogBytecodeFixup BytecodeFixup
-
-
-typedef struct {
  sqInt annotation;
  AbstractInstruction *instruction;
  } InstructionAnnotation;
@@ -107,7 +99,35 @@
 #define CogPrimitiveDescriptor PrimitiveDescriptor
 
 
+typedef struct {
+ AbstractInstruction *targetInstruction;
+ sqInt instructionIndex;
+ sqInt simStackPtr;
+ sqInt simSpillBase;
+ sqInt mergeBase;
+ sqInt optStatus;
+ } BytecodeFixup;
 
+#define CogSSBytecodeFixup BytecodeFixup
+
+
+typedef struct {
+ char type;
+ char spilled;
+ sqInt registerr;
+ sqInt offset;
+ sqInt constant;
+ sqInt bcptr;
+ } CogSimStackEntry;
+
+
+typedef struct {
+ sqInt isReceiverResultRegLive;
+ CogSimStackEntry *ssEntry;
+ } CogSSOptStatus;
+
+
+
 /*** Constants ***/
 #define AddCqR 82
 #define AddCwR 89
@@ -123,6 +143,7 @@
 #define ArithmeticShiftRightCqR 68
 #define ArithmeticShiftRightRR 69
 #define BaseHeaderSize 4
+#define BytesPerOop 4
 #define BytesPerWord 4
 #define Call 8
 #define CDQ 102
@@ -143,6 +164,7 @@
 #define CmpCwR 88
 #define CmpRdRd 95
 #define CmpRR 74
+#define ConstZero 1
 #define ConvertRRd 101
 #define CPUID 105
 #define Debug DEBUGVM
@@ -176,6 +198,8 @@
 #define FoxMFReceiver -12
 #define FoxThisContext -8
 #define FPReg -1
+#define GPRegMax -3
+#define GPRegMin -8
 #define HasBytecodePC 5
 #define HashBitsOffset 17
 #define HashMaskUnshifted 0xFFF
@@ -268,7 +292,7 @@
 #define NegateR 67
 #define Nop 7
 #define NumSendTrampolines 4
-#define NumTrampolines 38
+#define NumTrampolines 50
 #define OrCqR 85
 #define OrRR 78
 #define PopR 62
@@ -294,6 +318,10 @@
 #define SizeMask 0xFC
 #define SPReg -2
 #define SqrtRd 100
+#define SSBaseOffset 1
+#define SSConstant 2
+#define SSRegister 3
+#define SSSpill 4
 #define StackPointerIndex 2
 #define SubCqR 83
 #define SubCwR 90
@@ -348,6 +376,7 @@
 static AbstractInstruction * annotateobjRef(AbstractInstruction *abstractInstruction, sqInt anOop);
 static AbstractInstruction * annotatewith(AbstractInstruction *abstractInstruction, sqInt annotationFlag);
 static void assertSaneJumpTarget(void *jumpTarget);
+static sqInt availableRegisterOrNil(void);
 static sqInt blockCodeSize(unsigned char byteZero, unsigned char byteOne, unsigned char byteTwo, unsigned char byteThree);
 static sqInt blockDispatchTargetsForperformarg(CogMethod *cogMethod, usqInt (*binaryFunction)(sqInt mcpc, sqInt arg), sqInt arg);
 sqInt bytecodePCForstartBcpcin(sqInt mcpc, sqInt startbcpc, CogBlockMethod *cogMethod);
@@ -500,8 +529,13 @@
 static sqInt doubleExtendedDoAnythingBytecode(void);
 static sqInt duplicateTopBytecode(void);
 static BytecodeFixup * ensureFixupAt(sqInt targetIndex);
+static BytecodeFixup * ensureNonMergeFixupAt(sqInt targetIndex);
+static void ensureReceiverResultRegContainsSelf(void);
+static void ensureSpilledAtfrom(CogSimStackEntry * self_in_ensureSpilledAtfrom, sqInt baseOffset, sqInt baseRegister);
 void enterCogCodePopReceiver(void);
 void enterCogCodePopReceiverAndClassRegs(void);
+void enterCogCodePopReceiverArg0Regs(void);
+void enterCogCodePopReceiverArg1Arg0Regs(void);
 static sqInt extendedPushBytecode(void);
 static sqInt extendedStoreAndPopBytecode(void);
 static sqInt extendedStoreBytecode(void);
@@ -530,8 +564,10 @@
 static sqInt genDoubleArithmeticpreOpCheck(sqInt arithmeticOperator, AbstractInstruction *(*preOpCheckOrNil)(int rcvrReg, int argReg));
 static sqInt genDoubleComparisoninvert(AbstractInstruction *(*jumpOpcodeGenerator)(void *), sqInt invertComparison);
 static AbstractInstruction * genDoubleFailIfZeroArgRcvrarg(sqInt rcvrReg, sqInt argReg);
+static void (*genEnilopmartForandandcalled(sqInt regArg1, sqInt regArg2, sqInt regArg3, char *trampolineName))(void) ;
 static void (*genEnilopmartForandcalled(sqInt regArg1, sqInt regArg2, char *trampolineName))(void) ;
 static void (*genEnilopmartForcalled(sqInt regArg, char *trampolineName))(void) ;
+static void (*genEnterPICEnilopmartNumArgs(sqInt numArgs))(void) ;
 static sqInt genExtendedSendBytecode(void);
 static sqInt genExtendedSuperBytecode(void);
 static sqInt genExternalizePointersForPrimitiveCall(void);
@@ -589,13 +625,16 @@
 static sqInt genLongJumpIfTrue(void);
 static sqInt genLongUnconditionalBackwardJump(void);
 static sqInt genLongUnconditionalForwardJump(void);
-static sqInt genMethodAbortTrampoline(void);
+static sqInt genMarshalledSendSupernumArgs(sqInt selector, sqInt numArgs);
+static sqInt genMarshalledSendnumArgs(sqInt selector, sqInt numArgs);
+static sqInt genMethodAbortTrampolineFor(sqInt numArgs);
 static void genMulRR(AbstractInstruction * self_in_genMulRR, sqInt regSource, sqInt regDest);
 static sqInt genMustBeBooleanTrampolineForcalled(sqInt boolean, char *trampolineName);
 static sqInt genNonLocalReturnTrampoline(void);
 static sqInt genPassConstasArgument(AbstractInstruction * self_in_genPassConstasArgument, sqInt constant, sqInt zeroRelativeArgIndex);
 static sqInt genPassRegasArgument(AbstractInstruction * self_in_genPassRegasArgument, sqInt abstractRegister, sqInt zeroRelativeArgIndex);
-static sqInt genPICAbortTrampoline(void);
+static sqInt genPICAbortTrampolineFor(sqInt numArgs);
+static sqInt genPICMissTrampolineFor(sqInt numArgs);
 static sqInt genPopStackBytecode(void);
 static sqInt genPrimitiveAdd(void);
 static sqInt genPrimitiveAsFloat(void);
@@ -650,6 +689,9 @@
 static sqInt genPushReceiverBytecode(void);
 static sqInt genPushReceiverVariableBytecode(void);
 static sqInt genPushReceiverVariable(sqInt index);
+static void genPushRegisterArgs(void);
+static void genPushRegisterArgsForAbortMissNumArgs(sqInt numArgs);
+static void genPushRegisterArgsForNumArgs(sqInt numArgs);
 static sqInt genPushRemoteTempLongBytecode(void);
 static sqInt genPushTemporaryVariableBytecode(void);
 static sqInt genPushTemporaryVariable(sqInt index);
@@ -675,19 +717,24 @@
 static sqInt genSendLiteralSelector1ArgBytecode(void);
 static sqInt genSendLiteralSelector2ArgsBytecode(void);
 static sqInt genSendSupernumArgs(sqInt selector, sqInt numArgs);
+static sqInt genSendTrampolineFornumArgscalledargargargarg(void *aRoutine, sqInt numArgs, char *aString, sqInt regOrConst0, sqInt regOrConst1, sqInt regOrConst2, sqInt regOrConst3);
 static sqInt genSendnumArgs(sqInt selector, sqInt numArgs);
 static sqInt genSetSmallIntegerTagsIn(sqInt scratchReg);
 static sqInt genShiftAwaySmallIntegerTagsInScratchReg(sqInt scratchReg);
 static sqInt genShortJumpIfFalse(void);
 static sqInt genShortUnconditionalJump(void);
 static sqInt genSmallIntegerComparison(sqInt jumpOpcode);
+static sqInt genSpecialSelectorArithmetic(void);
 static sqInt genSpecialSelectorClass(void);
+static sqInt genSpecialSelectorComparison(void);
 static sqInt genSpecialSelectorEqualsEquals(void);
 static sqInt genSpecialSelectorSend(void);
+static sqInt genSSPushSlotreg(sqInt index, sqInt baseReg);
 static sqInt genStoreAndPopReceiverVariableBytecode(void);
 static sqInt genStoreAndPopRemoteTempLongBytecode(void);
 static sqInt genStoreAndPopTemporaryVariableBytecode(void);
 static sqInt genStoreCheckTrampoline(void);
+static sqInt genStoreImmediateInSourceRegslotIndexdestReg(sqInt sourceReg, sqInt index, sqInt destReg);
 static sqInt genStorePopLiteralVariable(sqInt popBoolean, sqInt litVarIndex);
 static sqInt genStorePopMaybeContextReceiverVariable(sqInt popBoolean, sqInt slotIndex);
 static sqInt genStorePopReceiverVariable(sqInt popBoolean, sqInt slotIndex);
@@ -699,8 +746,6 @@
 static AbstractInstruction * genSubstituteReturnAddress(AbstractInstruction * self_in_genSubstituteReturnAddress, sqInt retpc);
 static sqInt genTrampolineForcalled(void *aRoutine, char *aString);
 static sqInt genTrampolineForcalledarg(void *aRoutine, char *aString, sqInt regOrConst0);
-static sqInt genTrampolineForcalledargarg(void *aRoutine, char *aString, sqInt regOrConst0, sqInt regOrConst1);
-static sqInt genTrampolineForcalledargargargarg(void *aRoutine, char *aString, sqInt regOrConst0, sqInt regOrConst1, sqInt regOrConst2, sqInt regOrConst3);
 static sqInt genTrampolineForcalledargargargresult(void *aRoutine, char *aString, sqInt regOrConst0, sqInt regOrConst1, sqInt regOrConst2, sqInt resultReg);
 static sqInt genTrampolineForcalledargargresult(void *aRoutine, char *aString, sqInt regOrConst0, sqInt regOrConst1, sqInt resultReg);
 static sqInt genTrampolineForcalledargresult(void *aRoutine, char *aString, sqInt regOrConst0, sqInt resultReg);
@@ -726,11 +771,14 @@
 static BytecodeFixup * initializeFixupAt(sqInt targetIndex);
 static sqInt initialMethodUsageCount(void);
 static sqInt initialOpenPICUsageCount(void);
+static void initSimStackForFramefulMethod(sqInt startpc);
+static void initSimStackForFramelessMethod(sqInt startpc);
 static sqInt inlineCacheTagAt(AbstractInstruction * self_in_inlineCacheTagAt, sqInt callSiteReturnAddress);
 static sqInt inlineCacheTagForInstance(sqInt oop);
 static sqInt inlineCacheTagIsYoung(sqInt cacheTag);
 static sqInt instructionSizeAt(AbstractInstruction * self_in_instructionSizeAt, sqInt pc);
 sqInt interpretOffset(void);
+static sqInt inverseBranchFor(sqInt opcode);
 static sqInt isAFixup(AbstractInstruction * self_in_isAFixup, void *fixupOrAddress);
 static sqInt isAnInstruction(AbstractInstruction * self_in_isAnInstruction, void *addressOrInstruction);
 static sqInt isBigEndian(AbstractInstruction * self_in_isBigEndian);
@@ -741,6 +789,7 @@
 static sqInt isPCDependent(AbstractInstruction * self_in_isPCDependent);
 static sqInt isQuick(AbstractInstruction * self_in_isQuick, unsigned long operand);
 sqInt isSendReturnPC(sqInt retpc);
+static sqInt isSmallIntegerTagNonZero(void);
 static AbstractInstruction * gJumpAboveOrEqual(void *jumpTarget);
 static AbstractInstruction * gJumpAbove(void *jumpTarget);
 static AbstractInstruction * gJumpBelow(void *jumpTarget);
@@ -756,6 +805,7 @@
 static AbstractInstruction * gJumpLong(void *jumpTarget);
 static AbstractInstruction * gJumpNegative(void *jumpTarget);
 static AbstractInstruction * gJumpNonZero(void *jumpTarget);
+static AbstractInstruction * gJumpNoOverflow(void *jumpTarget);
 static AbstractInstruction * gJumpOverflow(void *jumpTarget);
 static AbstractInstruction * JumpRT(sqInt callTarget);
 static AbstractInstruction * gJumpR(sqInt reg);
@@ -777,6 +827,7 @@
 static sqInt leafCallStackPointerDelta(AbstractInstruction * self_in_leafCallStackPointerDelta);
 void linkSendAtintocheckedreceiver(sqInt callSiteReturnAddress, CogMethod *sendingMethod, CogMethod *targetMethod, sqInt checked, sqInt receiver);
 static sqInt literalBeforeFollowingAddress(AbstractInstruction * self_in_literalBeforeFollowingAddress, sqInt followingAddress);
+static sqInt liveRegisters(void);
 static sqInt loadLiteralByteSize(AbstractInstruction * self_in_loadLiteralByteSize);
 static sqInt longBranchDistance(unsigned char byteZero, unsigned char byteOne);
 static sqInt longForwardBranchDistance(unsigned char byteZero, unsigned char byteOne);
@@ -816,11 +867,14 @@
 void markMethodAndReferents(CogBlockMethod *aCogMethod);
 static void markYoungObjectsIn(CogMethod *cogMethod);
 static sqInt markYoungObjectspcmethod(sqInt annotation, char *mcpc, sqInt cogMethod);
+static void marshallSendArguments(sqInt numArgs);
 usqInt maxCogMethodAddress(void);
 static sqInt maybeFreeCogMethodDoesntLookKosher(CogMethod *cogMethod);
 static void maybeGenerateCheckFeatures(void);
 static void maybeGenerateICacheFlush(void);
 sqInt mcPCForstartBcpcin(sqInt bcpc, sqInt startbcpc, CogBlockMethod *cogMethod);
+static void mergeAtfrom(CogSimStackEntry * self_in_mergeAtfrom, sqInt baseOffset, sqInt baseRegister);
+static void mergeafterReturn(BytecodeFixup *fixup, sqInt mergeFollowsReturn);
 static sqInt methodAbortTrampolineFor(sqInt numArgs);
 static CogMethod * methodAfter(CogMethod *cogMethod);
 CogMethod * methodFor(void *address);
@@ -828,6 +882,7 @@
 sqInt mnuOffset(void);
 static sqInt modRMRO(AbstractInstruction * self_in_modRMRO, sqInt mod, sqInt regMode, sqInt regOpcode);
 static AbstractInstruction * gNegateR(sqInt reg);
+static AbstractInstruction * gNop(void);
 static sqInt nextBytecodePCForatbyte0in(BytecodeDescriptor *descriptor, sqInt pc, sqInt opcodeByte, sqInt aMethodObj);
 static sqInt nextBytecodePCInMapAfterininBlockupTo(sqInt startbcpc, sqInt methodObject, sqInt isInBlock, sqInt endpc);
 static sqInt noCogMethodsMaximallyMarked(void);
@@ -852,6 +907,7 @@
 sqInt pcisWithinMethod(char *address, CogMethod *cogMethod);
 static sqInt picAbortTrampolineFor(sqInt numArgs);
 static void planCompaction(void);
+static void popToReg(CogSimStackEntry * self_in_popToReg, sqInt reg);
 static PrimitiveDescriptor * primitiveGeneratorOrNil(void);
 void printCogMethodFor(void *address);
 void printCogMethods(void);
@@ -866,7 +922,10 @@
 void recordCallOffsetInof(CogMethod *cogMethod, void *callLabelArg);
 static void recordGeneratedRunTimeaddress(char *aString, sqInt address);
 sqInt recordPrimTraceFunc(void);
+static sqInt registerMask(CogSimStackEntry * self_in_registerMask);
+static sqInt registerMaskFor(sqInt reg);
 static sqInt registerMaskForandand(sqInt reg1, sqInt reg2, sqInt reg3);
+static sqInt registerOrNil(CogSimStackEntry * self_in_registerOrNil);
 static void relocateAndPruneYoungReferrers(void);
 static void relocateCallBeforeReturnPCby(AbstractInstruction * self_in_relocateCallBeforeReturnPCby, sqInt retpc, sqInt delta);
 static void relocateCallsAndSelfReferencesInMethod(CogMethod *cogMethod);
@@ -906,10 +965,31 @@
 static sqInt sizePCDependentInstructionAt(AbstractInstruction * self_in_sizePCDependentInstructionAt, sqInt eventualAbsoluteAddress);
 static sqInt slotOffsetOfInstVarIndex(sqInt index);
 static sqInt spanForatbyte0in(BytecodeDescriptor *descriptor, sqInt pc, sqInt opcodeByte, sqInt aMethodObj);
+static void ssAllocateCallReg(sqInt requiredReg1);
+static void ssAllocateCallRegand(sqInt requiredReg1, sqInt requiredReg2);
+static sqInt ssAllocatePreferredReg(sqInt preferredReg);
+static void ssAllocateRequiredRegMaskupThrough(sqInt requiredRegsMask, sqInt stackPtr);
+static void ssAllocateRequiredReg(sqInt requiredReg);
+static void ssAllocateRequiredRegand(sqInt requiredReg1, sqInt requiredReg2);
+static void ssAllocateRequiredRegupThrough(sqInt requiredReg, sqInt stackPtr);
+static void ssFlushTo(sqInt index);
+static void ssFlushUpThroughReceiverVariable(sqInt slotIndex);
+static void ssFlushUpThroughTemporaryVariable(sqInt tempIndex);
+static void ssPop(sqInt n);
+static sqInt ssPushBaseoffset(sqInt reg, sqInt offset);
+static sqInt ssPushConstant(sqInt literal);
+static sqInt ssPushDesc(CogSimStackEntry simStackEntry);
+static sqInt ssPushRegister(sqInt reg);
+static void ssPush(sqInt n);
+static sqInt ssStorePoptoPreferredReg(sqInt popBoolean, sqInt preferredReg);
+static CogSimStackEntry * ssTop(void);
+static CogSimStackEntry ssTopDescriptor(void);
+static CogSimStackEntry * ssValue(sqInt n);
 static sqInt stackBytesForNumArgs(AbstractInstruction * self_in_stackBytesForNumArgs, sqInt numArgs);
 sqInt stackPageHeadroomBytes(void);
 static sqInt stackPageInterruptHeadroomBytes(AbstractInstruction * self_in_stackPageInterruptHeadroomBytes);
 static void storeLiteralbeforeFollowingAddress(AbstractInstruction * self_in_storeLiteralbeforeFollowingAddress, sqInt literal, sqInt followingAddress);
+static void storeToReg(CogSimStackEntry * self_in_storeToReg, sqInt reg);
 static sqInt sib(AbstractInstruction * self_in_sib, sqInt scale, sqInt indexReg, sqInt baseReg);
 sqInt traceLinkedSendOffset(void);
 static char * trampolineNamenumArgs(char *routinePrefix, sqInt numArgs);
@@ -956,6 +1036,7 @@
 static sqInt bytecodePointer;
 void * CFramePointer;
 void * CStackPointer;
+static sqInt callerSavedRegMask;
 sqInt ceBaseFrameReturnTrampoline;
 sqInt ceCannotResumeTrampoline;
 void (*ceCaptureCStackPointers)(void);
@@ -964,7 +1045,12 @@
 static sqInt ceClosureCopyTrampoline;
 static sqInt ceCPICMissTrampoline;
 static sqInt ceCreateNewArrayTrampoline;
+void (*ceEnter0ArgsPIC)(void);
+void (*ceEnter1ArgsPIC)(void);
+void (*ceEnter2ArgsPIC)(void);
 void (*ceEnterCogCodePopReceiverAndClassRegs)(void);
+void (*ceEnterCogCodePopReceiverArg0Regs)(void);
+void (*ceEnterCogCodePopReceiverArg1Arg0Regs)(void);
 void (*ceEnterCogCodePopReceiverReg)(void);
 static sqInt ceFetchContextInstVarTrampoline;
 static void (*ceFlushICache)(unsigned long from, unsigned long to);
@@ -997,6 +1083,8 @@
 static sqInt cPICCaseSize;
 static sqInt cPICEndSize;
 static const int cStackAlignment = STACK_ALIGN_BYTES;
+static sqInt deadCode;
+static sqInt debugFixupBreaks;
 unsigned long debugPrimCallStackOffset;
 static AbstractInstruction * endCPICCase0;
 static AbstractInstruction * endCPICCase1;
@@ -1011,22 +1099,22 @@
 static sqInt firstSend;
 static BytecodeFixup * fixups;
 static BytecodeDescriptor generatorTable[256] = {
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverVariableBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  { genPushTemporaryVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
  { genPushTemporaryVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
  { genPushTemporaryVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
@@ -1123,7 +1211,7 @@
  { genStoreAndPopTemporaryVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
  { genStoreAndPopTemporaryVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
  { genStoreAndPopTemporaryVariableBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
- { genPushReceiverBytecode, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { genPushReceiverBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  { genPushConstantTrueBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  { genPushConstantFalseBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  { genPushConstantNilBytecode, (sqInt (*)(unsigned char,...))0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
@@ -1187,28 +1275,28 @@
  { genLongJumpIfFalse, (sqInt (*)(unsigned char,...))longForwardBranchDistance, 0, 0, 2, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0 },
  { genLongJumpIfFalse, (sqInt (*)(unsigned char,...))longForwardBranchDistance, 0, 0, 2, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0 },
  { genLongJumpIfFalse, (sqInt (*)(unsigned char,...))longForwardBranchDistance, 0, 0, 2, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0 },
+ { genSpecialSelectorArithmetic, (sqInt (*)(unsigned char,...))0, 0, 75, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+ { genSpecialSelectorArithmetic, (sqInt (*)(unsigned char,...))0, 0, 76, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+ { genSpecialSelectorComparison, (sqInt (*)(unsigned char,...))0, 0, 23, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+ { genSpecialSelectorComparison, (sqInt (*)(unsigned char,...))0, 0, 25, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+ { genSpecialSelectorComparison, (sqInt (*)(unsigned char,...))0, 0, 26, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+ { genSpecialSelectorComparison, (sqInt (*)(unsigned char,...))0, 0, 24, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+ { genSpecialSelectorComparison, (sqInt (*)(unsigned char,...))0, 0, 15, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+ { genSpecialSelectorComparison, (sqInt (*)(unsigned char,...))0, 0, 16, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+ { genSpecialSelectorArithmetic, (sqInt (*)(unsigned char,...))0, 0, 77, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+ { genSpecialSelectorArithmetic, (sqInt (*)(unsigned char,...))0, 0, 78, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
- { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
- { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
- { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
- { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
- { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
- { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
- { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
- { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
- { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
- { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
  { genSpecialSelectorEqualsEquals, (sqInt (*)(unsigned char,...))0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  { genSpecialSelectorClass, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  { genSpecialSelectorSend, (sqInt (*)(unsigned char,...))0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
@@ -1277,10 +1365,12 @@
 static sqInt lastSend;
 static usqInt limitAddress;
 static CogBlockMethod * maxMethodBefore;
+static sqInt methodAbortTrampolines[4];
 static sqInt methodBytesFreedSinceLastCompaction;
 static AbstractInstruction *methodLabel = &aMethodLabel;
 static sqInt methodObj;
 static sqInt methodOrBlockNumArgs;
+static sqInt methodOrBlockNumTemps;
 static sqInt methodZoneBase;
 static sqInt missOffset;
 static AbstractInstruction * mnuCall;
@@ -1292,6 +1382,9 @@
 static sqInt opcodeIndex;
 static CogMethod *openPICList = 0;
 static sqInt openPICSize;
+static CogSSOptStatus optStatus;
+static sqInt picAbortTrampolines[4];
+static sqInt picMissTrampolines[4];
 static void (*postCompileHook)(CogMethod *, void *);
 static AbstractInstruction * primInvokeLabel;
 static PrimitiveDescriptor primitiveGeneratorTable[MaxCompiledPrimitiveIndex+1] = {
@@ -1521,9 +1614,16 @@
 };
 static sqInt primitiveIndex;
 void (*realCEEnterCogCodePopReceiverAndClassRegs)(void);
+void (*realCEEnterCogCodePopReceiverArg0Regs)(void);
+void (*realCEEnterCogCodePopReceiverArg1Arg0Regs)(void);
 void (*realCEEnterCogCodePopReceiverReg)(void);
+static sqInt regArgsHaveBeenPushed;
 static AbstractInstruction * sendMissCall;
 static sqInt sendTrampolines[NumSendTrampolines];
+static CogSimStackEntry simSelf;
+static sqInt simSpillBase;
+static CogSimStackEntry simStack[256];
+static sqInt simStackPtr;
 static AbstractInstruction * stackCheckLabel;
 static AbstractInstruction * stackOverflowCall;
 static sqInt superSendTrampolines[NumSendTrampolines];
@@ -1576,7 +1676,7 @@
 #define noCheckEntryOffset() cmNoCheckEntryOffset
 #define noContextSwitchBlockEntryOffset() blockNoContextSwitchOffset
 #define notYetImplemented() warning("not yet implemented")
-#define numRegArgs() 0
+#define numRegArgs() 1
 #define printNum(n) printf("%ld", (long) n)
 #define printOnTrace() (traceLinkedSends & 8)
 #define print(aString) printf(aString)
@@ -1586,7 +1686,12 @@
 #define reportError(n) warning("compilation error")
 #define setCFramePointer(theFP) (CFramePointer = (void *)(theFP))
 #define setCStackPointer(theSP) (CStackPointer = (void *)(theSP))
+#define simStackAt(index) (simStack + (index))
+#define traceDescriptor(ign) 0
+#define traceFixup(ign) 0
 #define traceMapbyteatfor(ig,no,re,d) 0
+#define traceMerge(ign) 0
+#define traceSimStack() 0
 #define tryLockVMOwner() (ceTryLockVMOwner() != 0)
 #define typeEtAlWord(cm) (((long *)(cm))[1])
 #define unlockVMOwner() ceUnlockVMOwner()
@@ -1932,6 +2037,30 @@
 }
 
 static sqInt
+availableRegisterOrNil(void)
+{
+    sqInt liveRegs;
+
+ liveRegs = liveRegisters();
+ if (!(liveRegs & (registerMaskFor(Arg1Reg)))) {
+ return Arg1Reg;
+ }
+ if (!(liveRegs & (registerMaskFor(Arg0Reg)))) {
+ return Arg0Reg;
+ }
+ if (!(liveRegs & (registerMaskFor(SendNumArgsReg)))) {
+ return SendNumArgsReg;
+ }
+ if (!(liveRegs & (registerMaskFor(ClassReg)))) {
+ return ClassReg;
+ }
+ if (!(liveRegs & (registerMaskFor(ReceiverResultReg)))) {
+ return ReceiverResultReg;
+ }
+ return null;
+}
+
+static sqInt
 blockCodeSize(unsigned char byteZero, unsigned char byteOne, unsigned char byteTwo, unsigned char byteThree)
 {
  return (byteTwo * 256) + byteThree;
@@ -2997,13 +3126,33 @@
 static sqInt
 compileAbstractInstructionsFromthrough(sqInt start, sqInt end)
 {
+    sqInt debugBytecodePointers;
     BytecodeDescriptor *descriptor;
     BytecodeFixup *fixup;
     sqInt nextOpcodeIndex;
     sqInt result;
 
+ traceSimStack();
  bytecodePointer = start;
+ descriptor = null;
+ deadCode = 0;
  do {
+ ;
+ fixup = fixupAt(bytecodePointer - initialPC);
+ if ((((usqInt)((fixup->targetInstruction)))) > 0) {
+ deadCode = 0;
+ if ((((usqInt)((fixup->targetInstruction)))) >= 2) {
+ mergeafterReturn(fixup, (descriptor != null)
+ && ((descriptor->isReturn)));
+ }
+ }
+ else {
+ if ((descriptor != null)
+ && ((descriptor->isReturn))) {
+ deadCode = 1;
+ }
+ }
+ ;
  byte0 = fetchByteofObject(bytecodePointer, methodObj);
  descriptor = generatorAt(byte0);
  if (((descriptor->numBytes)) > 1) {
@@ -3019,9 +3168,16 @@
  }
  }
  nextOpcodeIndex = opcodeIndex;
- result = ((descriptor->generator))();
- fixup = fixupAt(bytecodePointer - initialPC);
- if (((fixup->targetInstruction)) != 0) {
+ result = (deadCode
+ ? (((descriptor->isMapped))
+ || (inBlock
+ && ((descriptor->isMappedInBlock)))
+ ? annotateBytecode(gNop())
+ : 0),0
+ : ((descriptor->generator))());
+ traceDescriptor(descriptor);
+ traceSimStack();
+ if ((((((usqInt)((fixup->targetInstruction)))) >= 1) && ((((usqInt)((fixup->targetInstruction)))) <= 2))) {
  if (opcodeIndex == nextOpcodeIndex) {
  gLabel();
  }
@@ -3118,12 +3274,18 @@
  sp-> Nth temp
  Avoid use of SendNumArgsReg which is the flag determining whether
  context switch is allowed on stack-overflow. */
+/* Build a frame for a block activation. See CoInterpreter
+ class>>initializeFrameIndices. Override to push the register receiver and
+ register arguments, if any, and to correctly
+ initialize the explicitly nilled/pushed temp entries (they are /not/ of
+ type constant nil). */
 
 static void
 compileBlockFrameBuild(BlockStart *blockStart)
 {
     AbstractInstruction * cascade0;
     sqInt i;
+    sqInt ign;
 
  annotateBytecode(gLabel());
  gPushR(FPReg);
@@ -3144,6 +3306,20 @@
  gCmpRR(TempReg, SPReg);
  gJumpBelow(stackOverflowCall);
  (blockStart->stackCheckLabel = annotateBytecode(gLabel()));
+ methodOrBlockNumTemps = (((blockStart->numArgs)) + ((blockStart->numCopied))) + ((blockStart->numInitialNils));
+ initSimStackForFramefulMethod((blockStart->startpc));
+ if (((blockStart->numInitialNils)) > 0) {
+ if (((blockStart->numInitialNils)) > 1) {
+ annotateobjRef(gMoveCwR(nilObject(), TempReg), nilObject());
+ for (ign = 1; ign <= ((blockStart->numInitialNils)); ign += 1) {
+ gPushR(TempReg);
+ }
+ }
+ else {
+ annotateobjRef(gPushCw(nilObject()), nilObject());
+ }
+ methodOrBlockNumTemps = ((blockStart->numArgs)) + ((blockStart->numCopied));
+ }
 }
 
 
@@ -3151,10 +3327,14 @@
  which is what is initially in ReceiverResultReg. We must annotate the
  first instruction so that findMethodForStartBcpc:inHomeMethod: can
  function. We need two annotations because the first is a fiducial. */
+/* Make sure ReceiverResultReg holds the receiver, loaded from
+ the closure, which is what is initially in ReceiverResultReg */
 
 static void
 compileBlockFramelessEntry(BlockStart *blockStart)
 {
+ methodOrBlockNumTemps = ((blockStart->numArgs)) + ((blockStart->numCopied));
+ initSimStackForFramelessMethod((blockStart->startpc));
  annotateBytecode((blockStart->entryLabel));
  annotateBytecode((blockStart->entryLabel));
  genLoadSlotsourceRegdestReg(ClosureOuterContextIndex, ReceiverResultReg, TempReg);
@@ -3195,11 +3375,14 @@
 static CogMethod *
 compileCogMethod(sqInt selector)
 {
+    sqInt debugStackPointers;
     sqInt extra;
     sqInt numBlocks;
     sqInt numBytecodes;
     sqInt result;
 
+ methodOrBlockNumTemps = tempCountOf(methodObj);
+ ;
  hasYoungReferent = (isYoung(methodObj))
  || (isYoung(selector));
  methodOrBlockNumArgs = argumentCountOf(methodObj);
@@ -3320,6 +3503,9 @@
  Ensure SendNumArgsReg is set early on (incidentally to nilObj) because
  it is the flag determining whether context switch is allowed on
  stack-overflow.  */
+/* Build a frame for a CogMethod activation. See CoInterpreter
+ class>>initializeFrameIndices. Override to push the register receiver and
+ register arguments, if any. */
 
 static void
 compileFrameBuild(void)
@@ -3328,8 +3514,13 @@
     AbstractInstruction *jumpSkip;
 
  if (!(needsFrame)) {
+ initSimStackForFramelessMethod(initialPC);
  return;
  }
+ genPushRegisterArgs();
+ if (!(needsFrame)) {
+ return;
+ }
  gPushR(FPReg);
  gMoveRR(SPReg, FPReg);
  addDependent(methodLabel, annotateMethodRef(gPushCw(((sqInt)methodLabel))));
@@ -3356,6 +3547,7 @@
  jmpTarget(jumpSkip, stackCheckLabel = gLabel());
  }
  annotateBytecode(stackCheckLabel);
+ initSimStackForFramefulMethod(initialPC);
 }
 
 
@@ -3512,12 +3704,14 @@
 
 
 /* Compile the abstract instructions for the entire method. */
+/* Compile the abstract instructions for a method. */
 
 static sqInt
 compileMethod(void)
 {
     sqInt result;
 
+ regArgsHaveBeenPushed = 0;
  compileProlog();
  compileEntry();
  if (((result = compilePrimitive())) < 0) {
@@ -3559,7 +3753,7 @@
  while (compiledBlocksCount < blockCount) {
  blockStart = blockStartAt(compiledBlocksCount);
  compileBlockEntry(blockStart);
- if (((result = compileAbstractInstructionsFromthrough((blockStart->startpc), (((blockStart->startpc)) + ((blockStart->span))) - 1))) < 0) {
+ if (((result = compileAbstractInstructionsFromthrough(((blockStart->startpc)) + ((blockStart->numInitialNils)), (((blockStart->startpc)) + ((blockStart->span))) - 1))) < 0) {
  return result;
  }
  compiledBlocksCount += 1;
@@ -3592,7 +3786,7 @@
 
 /* Compile the code for an open PIC. Perform a probe of the first-level
  method lookup cache followed by a call of ceSendFromOpenPIC: if the probe
- fails.  */
+ fails. Override to push the register args when calling ceSendFromOpenPIC: */
 
 static void
 compileOpenPICnumArgs(sqInt selector, sqInt numArgs)
@@ -3654,6 +3848,7 @@
  gCmpRR(SendNumArgsReg, TempReg);
  gJumpZero(itsAHit);
  jmpTarget(jumpSelectorMiss, gLabel());
+ genPushRegisterArgsForNumArgs(numArgs);
  genSaveStackPointers();
  genLoadCStackPointers();
  addDependent(methodLabel, annotateMethodRef(gMoveCwR(((sqInt)methodLabel), SendNumArgsReg)));
@@ -5951,7 +6146,7 @@
 static sqInt
 cPICMissTrampolineFor(sqInt numArgs)
 {
- return ceCPICMissTrampoline;
+ return picMissTrampolines[((numArgs < ((numRegArgs()) + 1)) ? numArgs : ((numRegArgs()) + 1))];
 }
 
 static sqInt
@@ -6343,9 +6538,10 @@
 static sqInt
 duplicateTopBytecode(void)
 {
- gMoveMwrR(0, SPReg, TempReg);
- gPushR(TempReg);
- return 0;
+    CogSimStackEntry desc;
+
+ desc = ssTopDescriptor();
+ return ssPushDesc(desc);
 }
 
 
@@ -6360,13 +6556,93 @@
     BytecodeFixup *fixup;
 
  fixup = fixupAt(targetIndex);
+ traceFixup(fixup);
+ ;
+ if ((((usqInt)((fixup->targetInstruction)))) <= 1) {
+ (fixup->targetInstruction = ((AbstractInstruction *) 2));
+ (fixup->simStackPtr = simStackPtr);
+ }
+ else {
+ if (((fixup->simStackPtr)) <= -2) {
+ (fixup->simStackPtr = simStackPtr);
+ }
+ else {
+ assert(((fixup->simStackPtr)) == simStackPtr);
+ }
+ }
+ return fixup;
+}
+
+
+/* Make sure there's a flagged fixup at the targetIndex (pc relative to first
+ pc) in fixups.
+ Initially a fixup's target is just a flag. Later on it is replaced with a
+ proper instruction. */
+
+static BytecodeFixup *
+ensureNonMergeFixupAt(sqInt targetIndex)
+{
+    BytecodeFixup *fixup;
+
+ fixup = fixupAt(targetIndex);
  if (((fixup->targetInstruction)) == 0) {
  (fixup->targetInstruction = ((AbstractInstruction *) 1));
  }
+ ;
  return fixup;
 }
 
+static void
+ensureReceiverResultRegContainsSelf(void)
+{
+ if (needsFrame) {
+ if (!(((optStatus.isReceiverResultRegLive))
+ && (((optStatus.ssEntry)) == ((&simSelf))))) {
+ ssAllocateRequiredReg(ReceiverResultReg);
+ storeToReg((&simSelf), ReceiverResultReg);
+ }
+ (optStatus.isReceiverResultRegLive = 1);
+ (optStatus.ssEntry = (&simSelf));
+ }
+ else {
+ assert((((simSelf.type)) == SSRegister)
+ && (((simSelf.registerr)) == ReceiverResultReg));
+ assert(((optStatus.isReceiverResultRegLive))
+ && (((optStatus.ssEntry)) == ((&simSelf))));
+ }
+}
 
+static void
+ensureSpilledAtfrom(CogSimStackEntry * self_in_ensureSpilledAtfrom, sqInt baseOffset, sqInt baseRegister)
+{
+ if ((self_in_ensureSpilledAtfrom->spilled)) {
+ if (((self_in_ensureSpilledAtfrom->type)) == SSSpill) {
+ assert((((self_in_ensureSpilledAtfrom->offset)) == baseOffset)
+ && (((self_in_ensureSpilledAtfrom->registerr)) == baseRegister));
+ return;
+ }
+ }
+ assert(((self_in_ensureSpilledAtfrom->type)) != SSSpill);
+ if (((self_in_ensureSpilledAtfrom->type)) == SSConstant) {
+ annotateobjRef(gPushCw((self_in_ensureSpilledAtfrom->constant)), (self_in_ensureSpilledAtfrom->constant));
+ }
+ else {
+ if (((self_in_ensureSpilledAtfrom->type)) == SSBaseOffset) {
+ gMoveMwrR((self_in_ensureSpilledAtfrom->offset), (self_in_ensureSpilledAtfrom->registerr), TempReg);
+ gPushR(TempReg);
+ }
+ else {
+ assert(((self_in_ensureSpilledAtfrom->type)) == SSRegister);
+ gPushR((self_in_ensureSpilledAtfrom->registerr));
+ }
+ (self_in_ensureSpilledAtfrom->type) = SSSpill;
+ (self_in_ensureSpilledAtfrom->offset) = baseOffset;
+ (self_in_ensureSpilledAtfrom->registerr) = baseRegister;
+ }
+ (self_in_ensureSpilledAtfrom->spilled) = 1;
+}
+
+
 /* This is a static version of ceEnterCogCodePopReceiverReg
  for break-pointing when debugging in C. */
 /* (and this exists only to reference Debug) */
@@ -6394,6 +6670,34 @@
  realCEEnterCogCodePopReceiverAndClassRegs();
 }
 
+
+/* This is a static version of ceEnterCogCodePopReceiverArg0Regs
+ for break-pointing when debugging in C. */
+/* (and this exists only to reference Debug) */
+
+void
+enterCogCodePopReceiverArg0Regs(void)
+{
+ if (!(Debug)) {
+ error("what??");
+ }
+ realCEEnterCogCodePopReceiverArg0Regs();
+}
+
+
+/* This is a static version of ceEnterCogCodePopReceiverArg1Arg0Regs
+ for break-pointing when debugging in C. */
+/* (and this exists only to reference Debug) */
+
+void
+enterCogCodePopReceiverArg1Arg0Regs(void)
+{
+ if (!(Debug)) {
+ error("what??");
+ }
+ realCEEnterCogCodePopReceiverArg1Arg0Regs();
+}
+
 static sqInt
 extendedPushBytecode(void)
 {
@@ -6935,29 +7239,27 @@
 }
 
 
-/* Stack looks like
- receiver (also in ResultReceiverReg)
- arg
+/* Receiver and arg in registers.
+ Stack looks like
  return address */
 
 static sqInt
 genDoubleArithmeticpreOpCheck(sqInt arithmeticOperator, AbstractInstruction *(*preOpCheckOrNil)(int rcvrReg, int argReg))
 {
     AbstractInstruction *doOp;
-    AbstractInstruction *fail;
     AbstractInstruction *jumpFailAlloc;
     AbstractInstruction *jumpFailCheck;
     AbstractInstruction *jumpFailClass;
     AbstractInstruction *jumpSmallInt;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
+ gMoveRR(Arg0Reg, TempReg);
  genGetDoubleValueOfinto(ReceiverResultReg, DPFPReg0);
- gMoveRR(TempReg, ClassReg);
+ gMoveRR(Arg0Reg, ClassReg);
  jumpSmallInt = genJumpSmallIntegerInScratchReg(TempReg);
- genGetCompactClassIndexNonIntOfinto(ClassReg, SendNumArgsReg);
+ genGetCompactClassIndexNonIntOfinto(Arg0Reg, SendNumArgsReg);
  gCmpCqR(classFloatCompactIndex(), SendNumArgsReg);
  jumpFailClass = gJumpNonZero(0);
- genGetDoubleValueOfinto(ClassReg, DPFPReg1);
+ genGetDoubleValueOfinto(Arg0Reg, DPFPReg1);
  doOp = gLabel();
  if (preOpCheckOrNil == null) {
  null;
@@ -6968,29 +7270,30 @@
  genoperandoperand(arithmeticOperator, DPFPReg1, DPFPReg0);
  jumpFailAlloc = genAllocFloatValueintoscratchRegscratchReg(DPFPReg0, SendNumArgsReg, ClassReg, TempReg);
  gMoveRR(SendNumArgsReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gRetN(0);
+ assert(methodOrBlockNumArgs <= (numRegArgs()));
+ jmpTarget(jumpFailClass, gLabel());
+ if (preOpCheckOrNil == null) {
+ null;
+ }
+ else {
+ jmpTarget(jumpFailCheck, getJmpTarget(jumpFailClass));
+ }
+ genPushRegisterArgsForNumArgs(methodOrBlockNumArgs);
+ jumpFailClass = gJump(0);
  jmpTarget(jumpSmallInt, gLabel());
  genConvertSmallIntegerToIntegerInScratchReg(ClassReg);
  gConvertRRd(ClassReg, DPFPReg1);
  gJump(doOp);
  jmpTarget(jumpFailAlloc, gLabel());
  compileInterpreterPrimitive(functionPointerForCompiledMethodprimitiveIndex(methodObj, primitiveIndex));
- fail = gLabel();
  jmpTarget(jumpFailClass, gLabel());
- if (preOpCheckOrNil == null) {
- null;
- }
- else {
- jmpTarget(jumpFailCheck, fail);
- }
  return 0;
 }
 
 
-/* Stack looks like
- receiver (also in ResultReceiverReg)
- arg
+/* Receiver and arg in registers.
+ Stack looks like
  return address */
 
 static sqInt
@@ -7001,14 +7304,13 @@
     AbstractInstruction *jumpFail;
     AbstractInstruction *jumpSmallInt;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
+ gMoveRR(Arg0Reg, TempReg);
  genGetDoubleValueOfinto(ReceiverResultReg, DPFPReg0);
- gMoveRR(TempReg, ClassReg);
  jumpSmallInt = genJumpSmallIntegerInScratchReg(TempReg);
- genGetCompactClassIndexNonIntOfinto(ClassReg, SendNumArgsReg);
+ genGetCompactClassIndexNonIntOfinto(Arg0Reg, SendNumArgsReg);
  gCmpCqR(classFloatCompactIndex(), SendNumArgsReg);
  jumpFail = gJumpNonZero(0);
- genGetDoubleValueOfinto(ClassReg, DPFPReg1);
+ genGetDoubleValueOfinto(Arg0Reg, DPFPReg1);
  if (invertComparison) {
 
  /* May need to invert for NaNs */
@@ -7023,13 +7325,12 @@
 
  jumpCond = jumpOpcodeGenerator(0);
  annotateobjRef(gMoveCwR(falseObject(), ReceiverResultReg), falseObject());
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpCond, annotateobjRef(gMoveCwR(trueObject(), ReceiverResultReg), trueObject()));
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpSmallInt, gLabel());
- genConvertSmallIntegerToIntegerInScratchReg(ClassReg);
- gConvertRRd(ClassReg, DPFPReg1);
+ genConvertSmallIntegerToIntegerInScratchReg(Arg0Reg);
+ gConvertRRd(Arg0Reg, DPFPReg1);
  gJump(compare);
  jmpTarget(jumpFail, gLabel());
  return 0;
@@ -7052,6 +7353,38 @@
  then executes a return instruction to pop off the entry-point and jump to
  it.  */
 
+static void (*genEnilopmartForandandcalled(sqInt regArg1, sqInt regArg2, sqInt regArg3, char *trampolineName))(void)
+
+{
+    sqInt endAddress;
+    sqInt enilopmart;
+    sqInt size;
+
+ opcodeIndex = 0;
+ genLoadStackPointers();
+ gPopR(regArg3);
+ gPopR(regArg2);
+ gPopR(regArg1);
+ gRetN(0);
+ computeMaximumSizes();
+ size = generateInstructionsAt(methodZoneBase);
+ endAddress = outputInstructionsAt(methodZoneBase);
+ assert((methodZoneBase + size) == endAddress);
+ enilopmart = methodZoneBase;
+ methodZoneBase = alignUptoRoutineBoundary(endAddress);
+ nopsFromto(backEnd, endAddress, methodZoneBase - 1);
+ recordGeneratedRunTimeaddress(trampolineName, enilopmart);
+ return ((void (*)(void)) enilopmart);
+}
+
+
+/* An enilopmart (the reverse of a trampoline) is a piece of code that makes
+ the system-call-like transition from the C runtime into generated machine
+ code. The desired arguments and entry-point are pushed on a stackPage's
+ stack. The enilopmart pops off the values to be loaded into registers and
+ then executes a return instruction to pop off the entry-point and jump to
+ it.  */
+
 static void (*genEnilopmartForandcalled(sqInt regArg1, sqInt regArg2, char *trampolineName))(void)
 
 {
@@ -7106,6 +7439,44 @@
 }
 
 
+/* Generate special versions of the ceEnterCogCodePopReceiverAndClassRegs
+ enilopmart that also pop register args from the stack to undo the pushing
+ of register args in the abort/miss trampolines. */
+
+static void (*genEnterPICEnilopmartNumArgs(sqInt numArgs))(void)
+
+{
+    sqInt endAddress;
+    sqInt enilopmart;
+    sqInt size;
+
+ opcodeIndex = 0;
+ genLoadStackPointers();
+ gPopR(ClassReg);
+ gPopR(TempReg);
+ gPopR(SendNumArgsReg);
+ if (numArgs > 0) {
+ if (numArgs > 1) {
+ gPopR(Arg1Reg);
+ assert((numRegArgs()) == 2);
+ }
+ gPopR(Arg0Reg);
+ }
+ gPopR(ReceiverResultReg);
+ gPushR(SendNumArgsReg);
+ gJumpR(TempReg);
+ computeMaximumSizes();
+ size = generateInstructionsAt(methodZoneBase);
+ endAddress = outputInstructionsAt(methodZoneBase);
+ assert((methodZoneBase + size) == endAddress);
+ enilopmart = methodZoneBase;
+ methodZoneBase = alignUptoRoutineBoundary(endAddress);
+ nopsFromto(backEnd, endAddress, methodZoneBase - 1);
+ recordGeneratedRunTimeaddress(trampolineNamenumArgs("ceEnterPIC", numArgs), enilopmart);
+ return ((void (*)(void)) enilopmart);
+}
+
+
 /* Can use any of the first 32 literals for the selector and pass up to 7
  arguments.
  */
@@ -7122,9 +7493,13 @@
  return genSendSupernumArgs(literalofMethod(byte1 & 31, methodObj), ((usqInt) byte1) >> 5);
 }
 
+
+/* Override to push the register receiver and register arguments, if any. */
+
 static sqInt
 genExternalizePointersForPrimitiveCall(void)
 {
+ genPushRegisterArgs();
  gMoveMwrR(0, SPReg, ClassReg);
  gMoveRAw(FPReg, framePointerAddress());
  gLoadEffectiveAddressMwrR(BytesPerWord, SPReg, TempReg);
@@ -7308,6 +7683,9 @@
 /* Enilopmarts transfer control from C into machine code (backwards
  trampolines).
  */
+/* Enilopmarts transfer control from C into machine code (backwards
+ trampolines). Override to add version for generic and PIC-specific entry
+ with reg args. */
 
 static void
 generateEnilopmarts(void)
@@ -7333,6 +7711,27 @@
  cePrimReturnEnterCogCodeProfiling = methodZoneBase;
  outputInstructionsForGeneratedRuntimeAt(cePrimReturnEnterCogCodeProfiling);
  recordGeneratedRunTimeaddress("cePrimReturnEnterCogCodeProfiling", cePrimReturnEnterCogCodeProfiling);
+
+#  if Debug
+ realCEEnterCogCodePopReceiverArg0Regs = genEnilopmartForandcalled(ReceiverResultReg, Arg0Reg, "realCEEnterCogCodePopReceiverArg0Regs");
+ ceEnterCogCodePopReceiverArg0Regs = enterCogCodePopReceiverArg0Regs;
+ realCEEnterCogCodePopReceiverArg1Arg0Regs = genEnilopmartForandandcalled(ReceiverResultReg, Arg0Reg, Arg1Reg, "realCEEnterCogCodePopReceiverArg1Arg0Regs");
+ ceEnterCogCodePopReceiverArg1Arg0Regs = enterCogCodePopReceiverArg1Arg0Regs;
+
+#  else /* Debug */
+ ceEnterCogCodePopReceiverArg0Regs = genEnilopmartForandcalled(ReceiverResultReg, Arg0Reg, "ceEnterCogCodePopReceiverArg0Regs");
+ ceEnterCogCodePopReceiverArg1Arg0Regs = genEnilopmartForandandcalled(ReceiverResultReg, Arg0Reg, Arg1Reg, "ceEnterCogCodePopReceiverArg1Arg0Regs");
+
+#  endif /* Debug */
+
+ ceEnter0ArgsPIC = genEnterPICEnilopmartNumArgs(0);
+ if ((numRegArgs()) >= 1) {
+ ceEnter1ArgsPIC = genEnterPICEnilopmartNumArgs(1);
+ if ((numRegArgs()) >= 2) {
+ ceEnter1ArgsPIC = genEnterPICEnilopmartNumArgs(2);
+ assert((numRegArgs()) == 2);
+ }
+ }
 }
 
 
@@ -7487,9 +7886,17 @@
 static void
 generateMissAbortTrampolines(void)
 {
- ceMethodAbortTrampoline = genMethodAbortTrampoline();
- cePICAbortTrampoline = genPICAbortTrampoline();
- ceCPICMissTrampoline = genTrampolineForcalledargarg(ceCPICMissreceiver, "ceCPICMissTrampoline", ClassReg, ReceiverResultReg);
+    sqInt numArgs;
+
+ for (numArgs = 0; numArgs <= ((numRegArgs()) + 1); numArgs += 1) {
+ methodAbortTrampolines[numArgs] = (genMethodAbortTrampolineFor(numArgs));
+ }
+ for (numArgs = 0; numArgs <= ((numRegArgs()) + 1); numArgs += 1) {
+ picAbortTrampolines[numArgs] = (genPICAbortTrampolineFor(numArgs));
+ }
+ for (numArgs = 0; numArgs <= ((numRegArgs()) + 1); numArgs += 1) {
+ picMissTrampolines[numArgs] = (genPICMissTrampolineFor(numArgs));
+ }
  ;
 }
 
@@ -7549,6 +7956,9 @@
 }
 
 
+/* Override to generate code to push the register arg(s) for <= numRegArg
+ arity sends.
+ */
 /* Slang needs these apparently superfluous asSymbol sends. */
 
 static void
@@ -7557,13 +7967,13 @@
     sqInt numArgs;
 
  for (numArgs = 0; numArgs <= (NumSendTrampolines - 2); numArgs += 1) {
- sendTrampolines[numArgs] = (genTrampolineForcalledargargargarg(ceSendsupertonumArgs, trampolineNamenumArgs("ceSend", numArgs), ClassReg, 0, ReceiverResultReg, numArgs));
+ sendTrampolines[numArgs] = (genSendTrampolineFornumArgscalledargargargarg(ceSendsupertonumArgs, numArgs, trampolineNamenumArgs("ceSend", numArgs), ClassReg, 0, ReceiverResultReg, numArgs));
  }
- sendTrampolines[NumSendTrampolines - 1] = (genTrampolineForcalledargargargarg(ceSendsupertonumArgs, trampolineNamenumArgs("ceSend", -1), ClassReg, 0, ReceiverResultReg, SendNumArgsReg));
+ sendTrampolines[NumSendTrampolines - 1] = (genSendTrampolineFornumArgscalledargargargarg(ceSendsupertonumArgs, (numRegArgs()) + 1, trampolineNamenumArgs("ceSend", -1), ClassReg, 0, ReceiverResultReg, SendNumArgsReg));
  for (numArgs = 0; numArgs <= (NumSendTrampolines - 2); numArgs += 1) {
- superSendTrampolines[numArgs] = (genTrampolineForcalledargargargarg(ceSendsupertonumArgs, trampolineNamenumArgs("ceSuperSend", numArgs), ClassReg, 1, ReceiverResultReg, numArgs));
+ superSendTrampolines[numArgs] = (genSendTrampolineFornumArgscalledargargargarg(ceSendsupertonumArgs, numArgs, trampolineNamenumArgs("ceSuperSend", numArgs), ClassReg, 1, ReceiverResultReg, numArgs));
  }
- superSendTrampolines[NumSendTrampolines - 1] = (genTrampolineForcalledargargargarg(ceSendsupertonumArgs, trampolineNamenumArgs("ceSuperSend", -1), ClassReg, 1, ReceiverResultReg, SendNumArgsReg));
+ superSendTrampolines[NumSendTrampolines - 1] = (genSendTrampolineFornumArgscalledargargargarg(ceSendsupertonumArgs, (numRegArgs()) + 1, trampolineNamenumArgs("ceSuperSend", -1), ClassReg, 1, ReceiverResultReg, SendNumArgsReg));
  firstSend = sendTrampolines[0];
  lastSend = superSendTrampolines[NumSendTrampolines - 1];
 }
@@ -7603,7 +8013,7 @@
 {
  ceTraceLinkedSendTrampoline = genSafeTrampolineForcalledarg(ceTraceLinkedSend, "ceTraceLinkedSendTrampoline", ReceiverResultReg);
  ceTraceBlockActivationTrampoline = genTrampolineForcalled(ceTraceBlockActivation, "ceTraceBlockActivationTrampoline");
- ceTraceStoreTrampoline = genSafeTrampolineForcalledargarg(ceTraceStoreOfinto, "ceTraceStoreTrampoline", ClassReg, ReceiverResultReg);
+ ceTraceStoreTrampoline = genSafeTrampolineForcalledargarg(ceTraceStoreOfinto, "ceTraceStoreTrampoline", TempReg, ReceiverResultReg);
 }
 
 
@@ -8073,6 +8483,7 @@
 static sqInt
 genJumpBackTo(sqInt targetBytecodePC)
 {
+ ssFlushTo(simStackPtr);
  gMoveAwR(stackLimitAddress(), TempReg);
  gCmpRR(TempReg, SPReg);
  gJumpAboveOrEqual(fixupAt(targetBytecodePC - initialPC));
@@ -8129,19 +8540,25 @@
  return jumpToTarget;
 }
 
-
-/* Cunning trick by LPD. If true and false are contiguous subtract the
- smaller. Correct result is either 0 or the distance between them. If
- result is not 0 or
- their distance send mustBeBoolean. */
-
 static sqInt
 genJumpIfto(sqInt boolean, sqInt targetBytecodePC)
 {
+    CogSimStackEntry *desc;
     AbstractInstruction *ok;
 
+ ssFlushTo(simStackPtr - 1);
+ desc = ssTop();
+ ssPop(1);
+ if ((((desc->type)) == SSConstant)
+ && ((((desc->constant)) == (trueObject()))
+ || (((desc->constant)) == (falseObject())))) {
+ annotateBytecode((((desc->constant)) == boolean
+ ? gJump(ensureFixupAt(targetBytecodePC - initialPC))
+ : gLabel()));
+ return 0;
+ }
+ popToReg(desc, TempReg);
  assert((objectAfter(falseObject())) == (trueObject()));
- gPopR(TempReg);
  annotateobjRef(gSubCwR(boolean, TempReg), boolean);
  gJumpZero(ensureFixupAt(targetBytecodePC - initialPC));
  gCmpCqR((boolean == (falseObject())
@@ -8172,6 +8589,7 @@
 static sqInt
 genJumpTo(sqInt targetBytecodePC)
 {
+ ssFlushTo(simStackPtr);
  gJump(ensureFixupAt(targetBytecodePC - initialPC));
  return 0;
 }
@@ -8268,7 +8686,39 @@
  return genJumpTo(targetpc);
 }
 
+static sqInt
+genMarshalledSendSupernumArgs(sqInt selector, sqInt numArgs)
+{
+ if (isYoung(selector)) {
+ hasYoungReferent = 1;
+ }
+ assert(needsFrame);
+ if (numArgs > 2) {
+ gMoveCqR(numArgs, SendNumArgsReg);
+ }
+ gMoveCwR(selector, ClassReg);
+ CallSend(superSendTrampolines[((numArgs < (NumSendTrampolines - 1)) ? numArgs : (NumSendTrampolines - 1))]);
+ (optStatus.isReceiverResultRegLive = 0);
+ return ssPushRegister(ReceiverResultReg);
+}
 
+static sqInt
+genMarshalledSendnumArgs(sqInt selector, sqInt numArgs)
+{
+ if (isYoung(selector)) {
+ hasYoungReferent = 1;
+ }
+ assert(needsFrame);
+ if (numArgs > 2) {
+ gMoveCqR(numArgs, SendNumArgsReg);
+ }
+ gMoveCwR(selector, ClassReg);
+ CallSend(sendTrampolines[((numArgs < (NumSendTrampolines - 1)) ? numArgs : (NumSendTrampolines - 1))]);
+ (optStatus.isReceiverResultRegLive = 0);
+ return ssPushRegister(ReceiverResultReg);
+}
+
+
 /* Generate the abort for a method. This abort performs either a call of
  ceSICMiss: to handle a single-in-line cache miss or a call of
  ceStackOverflow: to handle a
@@ -8280,7 +8730,7 @@
  miss.  */
 
 static sqInt
-genMethodAbortTrampoline(void)
+genMethodAbortTrampolineFor(sqInt numArgs)
 {
     AbstractInstruction *jumpSICMiss;
 
@@ -8289,7 +8739,10 @@
  jumpSICMiss = gJumpNonZero(0);
  compileTrampolineForcallJumpBarnumArgsargargargargsaveRegsresultReg(ceStackOverflow, 1, 1, SendNumArgsReg, null, null, null, 0, null);
  jmpTarget(jumpSICMiss, gLabel());
- return genTrampolineForcalledcallJumpBarnumArgsargargargargsaveRegsresultRegappendOpcodes(ceSICMiss, "ceMethodAbort", 1, 1, ReceiverResultReg, null, null, null, 0, null, 1);
+ genPushRegisterArgsForAbortMissNumArgs(numArgs);
+ return genTrampolineForcalledcallJumpBarnumArgsargargargargsaveRegsresultRegappendOpcodes(ceSICMiss, trampolineNamenumArgs("ceMethodAbort", (numArgs <= (numRegArgs())
+ ? numArgs
+ : -1)), 1, 1, ReceiverResultReg, null, null, null, 0, null, 1);
 }
 
 static void
@@ -8342,62 +8795,71 @@
  ClassReg. If the register is zero then this is an MNU. */
 
 static sqInt
-genPICAbortTrampoline(void)
+genPICAbortTrampolineFor(sqInt numArgs)
 {
  opcodeIndex = 0;
- return genInnerPICAbortTrampoline("cePICAbort");
+ genPushRegisterArgsForAbortMissNumArgs(numArgs);
+ return genInnerPICAbortTrampoline(trampolineNamenumArgs("cePICAbort", (numArgs <= (numRegArgs())
+ ? numArgs
+ : -1)));
 }
 
 static sqInt
+genPICMissTrampolineFor(sqInt numArgs)
+{
+    sqInt startAddress;
+
+ startAddress = methodZoneBase;
+
+ /* N.B. a closed PIC jumps to the miss routine, not calls it, so there is only one retpc on the stack. */
+
+ opcodeIndex = 0;
+ genPushRegisterArgsForNumArgs(numArgs);
+ genTrampolineForcalledcallJumpBarnumArgsargargargargsaveRegsresultRegappendOpcodes(ceCPICMissreceiver, trampolineNamenumArgs("cePICMiss", (numArgs <= (numRegArgs())
+ ? numArgs
+ : -1)), 1, 2, ClassReg, ReceiverResultReg, null, null, 0, null, 1);
+ return startAddress;
+}
+
+static sqInt
 genPopStackBytecode(void)
 {
- gAddCqR(BytesPerWord, SPReg);
+ if ((ssTop()->spilled)) {
+ gAddCqR(BytesPerWord, SPReg);
+ }
+ ssPop(1);
  return 0;
 }
 
-
-/* Stack looks like
- receiver (also in ResultReceiverReg)
- arg
- return address */
-
 static sqInt
 genPrimitiveAdd(void)
 {
     AbstractInstruction *jumpNotSI;
     AbstractInstruction *jumpOvfl;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gMoveRR(TempReg, ClassReg);
+ gMoveRR(Arg0Reg, TempReg);
+ gMoveRR(Arg0Reg, ClassReg);
  jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
  genRemoveSmallIntegerTagsInScratchReg(ClassReg);
- gMoveRR(ReceiverResultReg, TempReg);
- gAddRR(ClassReg, TempReg);
+ gAddRR(ReceiverResultReg, ClassReg);
  jumpOvfl = gJumpOverflow(0);
- gMoveRR(TempReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gMoveRR(ClassReg, ReceiverResultReg);
+ gRetN(0);
  jmpTarget(jumpOvfl, jmpTarget(jumpNotSI, gLabel()));
  return 0;
 }
 
-
-/* Stack looks like
- receiver (also in ResultReceiverReg)
- return address */
-
 static sqInt
 genPrimitiveAsFloat(void)
 {
     AbstractInstruction *jumpFailAlloc;
 
- gMoveRR(ReceiverResultReg, ClassReg);
- genConvertSmallIntegerToIntegerInScratchReg(ClassReg);
- gConvertRRd(ClassReg, DPFPReg0);
+ gMoveRR(ReceiverResultReg, TempReg);
+ genConvertSmallIntegerToIntegerInScratchReg(TempReg);
+ gConvertRRd(TempReg, DPFPReg0);
  jumpFailAlloc = genAllocFloatValueintoscratchRegscratchReg(DPFPReg0, SendNumArgsReg, ClassReg, TempReg);
  gMoveRR(SendNumArgsReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord);
+ gRetN(0);
  jmpTarget(jumpFailAlloc, gLabel());
  compileInterpreterPrimitive(functionPointerForCompiledMethodprimitiveIndex(methodObj, primitiveIndex));
  return 0;
@@ -8406,8 +8868,8 @@
 static sqInt
 genPrimitiveAt(void)
 {
- gMoveMwrR(BytesPerWord, SPReg, Arg0Reg);
- return genInnerPrimitiveAt(BytesPerWord * 2);
+ assert((numRegArgs()) >= 1);
+ return genInnerPrimitiveAt(0);
 }
 
 static sqInt
@@ -8415,15 +8877,13 @@
 {
     AbstractInstruction *jumpNotSI;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gMoveRR(TempReg, ClassReg);
+ gMoveRR(Arg0Reg, TempReg);
 
- /* Whether the SmallInteger tags are zero or non-zero, anding them together will preserve them. */
+ /* Whether the SmallInteger tags are zero or non-zero, oring them together will preserve them. */
 
  jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
- gAndRR(ClassReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gAndRR(Arg0Reg, ReceiverResultReg);
+ gRetN(0);
  jmpTarget(jumpNotSI, gLabel());
  return 0;
 }
@@ -8433,26 +8893,23 @@
 {
     AbstractInstruction *jumpNotSI;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gMoveRR(TempReg, ClassReg);
+ gMoveRR(Arg0Reg, TempReg);
 
  /* Whether the SmallInteger tags are zero or non-zero, oring them together will preserve them. */
 
  jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
- gOrRR(ClassReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gOrRR(Arg0Reg, ReceiverResultReg);
+ gRetN(0);
  jmpTarget(jumpNotSI, gLabel());
  return 0;
 }
 
 
-/* Stack looks like
- receiver (also in ResultReceiverReg)
- arg
+/* Receiver and arg in registers.
+ Stack looks like
  return address
 
- rTemp := ArgOffset(SP)
+ rTemp := rArg0
  rClass := tTemp
  rTemp := rTemp & 1
  jz nonInt
@@ -8494,8 +8951,9 @@
     AbstractInstruction *jumpOvfl;
     AbstractInstruction *jumpTooBig;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gMoveRR(TempReg, ClassReg);
+ assert((numRegArgs()) >= 1);
+ gMoveRR(Arg0Reg, TempReg);
+ gMoveRR(Arg0Reg, ClassReg);
  jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
  genConvertSmallIntegerToIntegerInScratchReg(ClassReg);
  if (!(setsConditionCodesFor(lastOpcode(), JumpNegative))) {
@@ -8512,14 +8970,14 @@
  genRemoveSmallIntegerTagsInScratchReg(ReceiverResultReg);
  gLogicalShiftLeftRR(ClassReg, ReceiverResultReg);
  genAddSmallIntegerTagsTo(ReceiverResultReg);
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpNegative, gNegateR(ClassReg));
  gCmpCqR(numSmallIntegerBits(), ClassReg);
  jumpInRange = gJumpLessOrEqual(0);
  gMoveCqR(numSmallIntegerBits(), ClassReg);
  jmpTarget(jumpInRange, gArithmeticShiftRightRR(ClassReg, ReceiverResultReg));
  genSetSmallIntegerTagsIn(ReceiverResultReg);
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpNotSI, jmpTarget(jumpTooBig, jmpTarget(jumpOvfl, gLabel())));
  return 0;
 }
@@ -8529,16 +8987,14 @@
 {
     AbstractInstruction *jumpNotSI;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gMoveRR(TempReg, ClassReg);
+ gMoveRR(Arg0Reg, TempReg);
 
  /* Clear one or the other tag so that xoring will preserve them. */
 
  jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
- genRemoveSmallIntegerTagsInScratchReg(ClassReg);
- gXorRR(ClassReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ genRemoveSmallIntegerTagsInScratchReg(Arg0Reg);
+ gXorRR(Arg0Reg, ReceiverResultReg);
+ gRetN(0);
  jmpTarget(jumpNotSI, gLabel());
  return 0;
 }
@@ -8550,6 +9006,13 @@
  block entry or the no-context-switch entry, as appropriate, and we're
  done. If not,
  invoke the interpreter primitive. */
+/* Check the argument count. Fail if wrong.
+ Get the method from the outerContext and see if it is cogged. If so, jump
+ to the
+ block entry or the no-context-switch entry, as appropriate, and we're
+ done. If not,
+ invoke the interpreter primitive.
+ Override to push the register args first. */
 
 static sqInt
 genPrimitiveClosureValue(void)
@@ -8559,6 +9022,7 @@
     void (*primitiveRoutine)();
     sqInt result;
 
+ genPushRegisterArgs();
  genLoadSlotsourceRegdestReg(ClosureNumArgsIndex, ReceiverResultReg, TempReg);
  gCmpCqR(((methodOrBlockNumArgs << 1) | 1), TempReg);
  jumpFail = gJumpNonZero(0);
@@ -8591,9 +9055,9 @@
     AbstractInstruction *jumpSameSign;
     AbstractInstruction *jumpZero;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gMoveRR(TempReg, ClassReg);
- gMoveRR(TempReg, Arg1Reg);
+ gMoveRR(Arg0Reg, TempReg);
+ gMoveRR(Arg0Reg, ClassReg);
+ gMoveRR(Arg0Reg, Arg1Reg);
 
  /* We must shift away the tags, not just subtract them, so that the
  overflow case doesn't actually overflow the machine instruction. */
@@ -8621,8 +9085,7 @@
  jmpTarget(jumpSameSign, convert = gLabel());
  genConvertIntegerToSmallIntegerInScratchReg(TempReg);
  gMoveRR(TempReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpExact, gCmpCqR(1 << ((numSmallIntegerBits()) - 1), TempReg));
  gJumpLess(convert);
  jmpTarget(jumpZero, jmpTarget(jumpNotSI, gLabel()));
@@ -8637,8 +9100,8 @@
     AbstractInstruction *jumpOverflow;
     AbstractInstruction *jumpZero;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gMoveRR(TempReg, ClassReg);
+ gMoveRR(Arg0Reg, TempReg);
+ gMoveRR(Arg0Reg, ClassReg);
 
  /* We must shift away the tags, not just subtract them, so that the
  overflow case doesn't actually overflow the machine instruction. */
@@ -8658,8 +9121,7 @@
  jumpOverflow = gJumpGreaterOrEqual(0);
  genConvertIntegerToSmallIntegerInScratchReg(TempReg);
  gMoveRR(TempReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpOverflow, jmpTarget(jumpInexact, jmpTarget(jumpZero, jmpTarget(jumpNotSI, gLabel()))));
  return 0;
 }
@@ -8671,9 +9133,8 @@
 }
 
 
-/* Stack looks like
- receiver (also in ResultReceiverReg)
- arg
+/* Receiver and arg in registers.
+ Stack looks like
  return address */
 
 static sqInt
@@ -8681,14 +9142,12 @@
 {
     AbstractInstruction *jumpFalse;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gCmpRR(TempReg, ReceiverResultReg);
+ gCmpRR(Arg0Reg, ReceiverResultReg);
  jumpFalse = gJumpNonZero(0);
  annotateobjRef(gMoveCwR(trueObject(), ReceiverResultReg), trueObject());
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpFalse, annotateobjRef(gMoveCwR(falseObject(), ReceiverResultReg), falseObject()));
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  return 0;
 }
 
@@ -8746,11 +9205,6 @@
  return genDoubleComparisoninvert(gJumpFPNotEqual, 0);
 }
 
-
-/* Stack looks like
- receiver (also in ResultReceiverReg)
- return address */
-
 static sqInt
 genPrimitiveFloatSquareRoot(void)
 {
@@ -8760,8 +9214,7 @@
  gSqrtRd(DPFPReg0);
  jumpFailAlloc = genAllocFloatValueintoscratchRegscratchReg(DPFPReg0, SendNumArgsReg, ClassReg, TempReg);
  gMoveRR(SendNumArgsReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord);
+ gRetN(0);
  jmpTarget(jumpFailAlloc, gLabel());
  compileInterpreterPrimitive(functionPointerForCompiledMethodprimitiveIndex(methodObj, primitiveIndex));
  return 0;
@@ -8794,8 +9247,7 @@
  jumpSI = genJumpSmallIntegerInScratchReg(ClassReg);
  genGetHashFieldNonIntOfasSmallIntegerInto(ReceiverResultReg, TempReg);
  gMoveRR(TempReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord);
+ gRetN(0);
  jmpTarget(jumpSI, gLabel());
  return 0;
 }
@@ -8820,8 +9272,8 @@
     AbstractInstruction *jumpSameSign;
     AbstractInstruction *jumpZero;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gMoveRR(TempReg, ClassReg);
+ gMoveRR(Arg0Reg, TempReg);
+ gMoveRR(Arg0Reg, ClassReg);
  jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
  genRemoveSmallIntegerTagsInScratchReg(ClassReg);
  jumpZero = gJumpZero(0);
@@ -8844,8 +9296,7 @@
  jmpTarget(jumpSameSign, jmpTarget(jumpExact, gLabel()));
  genSetSmallIntegerTagsIn(ClassReg);
  gMoveRR(ClassReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpZero, jmpTarget(jumpNotSI, gLabel()));
  return 0;
 }
@@ -8856,18 +9307,17 @@
     AbstractInstruction *jumpNotSI;
     AbstractInstruction *jumpOvfl;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gMoveRR(TempReg, ClassReg);
+ gMoveRR(Arg0Reg, TempReg);
+ gMoveRR(Arg0Reg, ClassReg);
+ gMoveRR(ReceiverResultReg, Arg1Reg);
  jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
  genShiftAwaySmallIntegerTagsInScratchReg(ClassReg);
- gMoveRR(ReceiverResultReg, TempReg);
- genRemoveSmallIntegerTagsInScratchReg(TempReg);
- gMulRR(TempReg, ClassReg);
+ genRemoveSmallIntegerTagsInScratchReg(Arg1Reg);
+ gMulRR(Arg1Reg, ClassReg);
  jumpOvfl = gJumpOverflow(0);
  genSetSmallIntegerTagsIn(ClassReg);
  gMoveRR(ClassReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpOvfl, jmpTarget(jumpNotSI, gLabel()));
  return 0;
 }
@@ -8885,8 +9335,8 @@
     AbstractInstruction *jumpOverflow;
     AbstractInstruction *jumpZero;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gMoveRR(TempReg, ClassReg);
+ gMoveRR(Arg0Reg, TempReg);
+ gMoveRR(Arg0Reg, ClassReg);
 
  /* We must shift away the tags, not just subtract them, so that the
  overflow case doesn't actually overflow the machine instruction. */
@@ -8904,8 +9354,7 @@
  jumpOverflow = gJumpGreaterOrEqual(0);
  genConvertIntegerToSmallIntegerInScratchReg(TempReg);
  gMoveRR(TempReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpOverflow, jmpTarget(jumpZero, jmpTarget(jumpNotSI, gLabel())));
  return 0;
 }
@@ -8913,38 +9362,30 @@
 static sqInt
 genPrimitiveSize(void)
 {
- return genInnerPrimitiveSize(BytesPerWord);
+ return genInnerPrimitiveSize(0);
 }
 
 static sqInt
 genPrimitiveStringAt(void)
 {
- gMoveMwrR(BytesPerWord, SPReg, Arg0Reg);
- return genInnerPrimitiveStringAt(BytesPerWord * 2);
+ assert((numRegArgs()) >= 1);
+ return genInnerPrimitiveStringAt(0);
 }
 
-
-/* Stack looks like
- receiver (also in ResultReceiverReg)
- arg
- return address */
-
 static sqInt
 genPrimitiveSubtract(void)
 {
     AbstractInstruction *jumpNotSI;
     AbstractInstruction *jumpOvfl;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gMoveRR(TempReg, ClassReg);
+ gMoveRR(Arg0Reg, TempReg);
  jumpNotSI = genJumpNotSmallIntegerInScratchReg(TempReg);
  gMoveRR(ReceiverResultReg, TempReg);
- gSubRR(ClassReg, TempReg);
+ gSubRR(Arg0Reg, TempReg);
  jumpOvfl = gJumpOverflow(0);
  genAddSmallIntegerTagsTo(TempReg);
  gMoveRR(TempReg, ReceiverResultReg);
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpOvfl, jmpTarget(jumpNotSI, gLabel()));
  return 0;
 }
@@ -9018,9 +9459,10 @@
 genPushActiveContextBytecode(void)
 {
  assert(needsFrame);
+ (optStatus.isReceiverResultRegLive = 0);
+ ssAllocateCallReg(ReceiverResultReg);
  CallRT(cePushActiveContextTrampoline);
- gPushR(ReceiverResultReg);
- return 0;
+ return ssPushRegister(ReceiverResultReg);
 }
 
 
@@ -9054,18 +9496,18 @@
 
  assert(needsFrame);
  addBlockStartAtnumArgsnumCopiedspan(bytecodePointer + 4, byte1 & 15, numCopied = ((usqInt) byte1) >> 4, (byte2 << 8) + byte3);
+ if (numCopied > 0) {
+ ssFlushTo(simStackPtr);
+ }
+ (optStatus.isReceiverResultRegLive = 0);
+ ssAllocateCallRegand(SendNumArgsReg, ReceiverResultReg);
  gMoveCqR(byte1 | ((bytecodePointer + 5) << 8), SendNumArgsReg);
  CallRT(ceClosureCopyTrampoline);
  if (numCopied > 0) {
- if (numCopied > 1) {
- gAddCqR((numCopied - 1) * BytesPerWord, SPReg);
- }
- gMoveRMwr(ReceiverResultReg, 0, SPReg);
+ gAddCqR(numCopied * BytesPerWord, SPReg);
+ ssPop(numCopied);
  }
- else {
- gPushR(ReceiverResultReg);
- }
- return 0;
+ return ssPushRegister(ReceiverResultReg);
 }
 
 static sqInt
@@ -9114,22 +9556,25 @@
 genPushLiteralVariable(sqInt literalIndex)
 {
     sqInt association;
+    sqInt freeReg;
 
+ freeReg = ssAllocatePreferredReg(ClassReg);
 
  /* N.B. Do _not_ use ReceiverResultReg to avoid overwriting receiver in assignment in frameless methods. */
+ /* So far descriptors are not rich enough to describe the entire dereference so generate the register
+ load but don't push the result.  There is an order-or-evaluation issue if we defer the dereference. */
 
  association = literalofMethod(literalIndex, methodObj);
- annotateobjRef(gMoveCwR(association, ClassReg), association);
- genLoadSlotsourceRegdestReg(ValueIndex, ClassReg, TempReg);
- gPushR(TempReg);
+ annotateobjRef(gMoveCwR(association, TempReg), association);
+ genLoadSlotsourceRegdestReg(ValueIndex, TempReg, freeReg);
+ ssPushRegister(freeReg);
  return 0;
 }
 
 static sqInt
 genPushLiteral(sqInt literal)
 {
- annotateobjRef(gPushCw(literal), literal);
- return 0;
+ return ssPushConstant(literal);
 }
 
 static sqInt
@@ -9139,14 +9584,16 @@
     AbstractInstruction *jmpSingle;
 
  assert(needsFrame);
+ ssAllocateCallRegand(ReceiverResultReg, SendNumArgsReg);
+ ensureReceiverResultRegContainsSelf();
+ if ((registerMaskFor(ReceiverResultReg)) & callerSavedRegMask) {
+ (optStatus.isReceiverResultRegLive = 0);
+ }
  if (slotIndex == InstructionPointerIndex) {
- gMoveMwrR(FoxMFReceiver, FPReg, ReceiverResultReg);
  gMoveCqR(slotIndex, SendNumArgsReg);
  CallRT(ceFetchContextInstVarTrampoline);
- gPushR(SendNumArgsReg);
- return 0;
+ return ssPushRegister(SendNumArgsReg);
  }
- gMoveMwrR(FoxMFReceiver, FPReg, ReceiverResultReg);
  genLoadSlotsourceRegdestReg(SenderIndex, ReceiverResultReg, TempReg);
  jmpSingle = genJumpNotSmallIntegerInScratchReg(TempReg);
  gMoveCqR(slotIndex, SendNumArgsReg);
@@ -9154,8 +9601,8 @@
  jmpDone = gJump(0);
  jmpTarget(jmpSingle, gLabel());
  genLoadSlotsourceRegdestReg(slotIndex, ReceiverResultReg, SendNumArgsReg);
- jmpTarget(jmpDone, gPushR(SendNumArgsReg));
- return 0;
+ jmpTarget(jmpDone, gLabel());
+ return ssPushRegister(SendNumArgsReg);
 }
 
 static sqInt
@@ -9166,7 +9613,13 @@
     sqInt size;
 
  assert(needsFrame);
- popValues = byte1 > 127;
+ (optStatus.isReceiverResultRegLive = 0);
+ if ((popValues = byte1 > 127)) {
+ ssFlushTo(simStackPtr);
+ }
+ else {
+ ssAllocateCallRegand(SendNumArgsReg, ReceiverResultReg);
+ }
  size = byte1 & 127;
  gMoveCqR(size, SendNumArgsReg);
  CallRT(ceCreateNewArrayTrampoline);
@@ -9175,9 +9628,9 @@
  gPopR(TempReg);
  genStoreSourceRegslotIndexintoNewObjectInDestReg(TempReg, i, ReceiverResultReg);
  }
+ ssPop(size);
  }
- gPushR(ReceiverResultReg);
- return 0;
+ return ssPushRegister(ReceiverResultReg);
 }
 
 static sqInt
@@ -9189,14 +9642,7 @@
 static sqInt
 genPushReceiverBytecode(void)
 {
- if (needsFrame) {
- gMoveMwrR(FoxMFReceiver, FPReg, TempReg);
- gPushR(TempReg);
- }
- else {
- gPushR(ReceiverResultReg);
- }
- return 0;
+ return ssPushDesc(simSelf);
 }
 
 static sqInt
@@ -9208,26 +9654,113 @@
 static sqInt
 genPushReceiverVariable(sqInt index)
 {
-    sqInt maybeErr;
+ ensureReceiverResultRegContainsSelf();
+ return genSSPushSlotreg(index, ReceiverResultReg);
+}
 
- if (needsFrame) {
- gMoveMwrR(FoxMFReceiver, FPReg, ReceiverResultReg);
+
+/* Ensure that the register args are pushed before the retpc for methods with
+ arity <= self numRegArgs.
+ */
+/* This won't be as clumsy on a RISC. But putting the receiver and
+ args above the return address means the CoInterpreter has a
+ single machine-code frame format which saves us a lot of work. */
+
+static void
+genPushRegisterArgs(void)
+{
+ if (!(regArgsHaveBeenPushed
+ || (methodOrBlockNumArgs > (numRegArgs())))) {
+ genPushRegisterArgsForNumArgs(methodOrBlockNumArgs);
+ regArgsHaveBeenPushed = 1;
  }
- maybeErr = genLoadSlotsourceRegdestReg(index, ReceiverResultReg, TempReg);
- if (maybeErr < 0) {
- return maybeErr;
+}
+
+
+/* Ensure that the register args are pushed before the outer and
+ inner retpcs at an entry miss for arity <= self numRegArgs. The
+ outer retpc is that of a call at a send site. The inner is the call
+ from a method or PIC abort/miss to the trampoline. */
+/* This won't be as clumsy on a RISC. But putting the receiver and
+ args above the return address means the CoInterpreter has a
+ single machine-code frame format which saves us a lot of work. */
+/* Iff there are register args convert
+ base -> outerRetpc (send site retpc)
+ sp -> innerRetpc (PIC abort/miss retpc)
+ to
+ base -> receiver
+ (arg0)
+ (arg1)
+ outerRetpc
+ sp -> innerRetpc (PIC abort/miss retpc) */
+
+static void
+genPushRegisterArgsForAbortMissNumArgs(sqInt numArgs)
+{
+ if (numArgs <= (numRegArgs())) {
+ assert((numRegArgs()) <= 2);
+ if (numArgs == 0) {
+ gMoveMwrR(0, SPReg, TempReg);
+ gPushR(TempReg);
+ gMoveMwrR(BytesPerWord * 2, SPReg, TempReg);
+ gMoveRMwr(TempReg, BytesPerWord, SPReg);
+ gMoveRMwr(ReceiverResultReg, 2 * BytesPerWord, SPReg);
+ return;
+ }
+ if (numArgs == 1) {
+ gMoveMwrR(BytesPerWord, SPReg, TempReg);
+ gPushR(TempReg);
+ gMoveMwrR(BytesPerWord, SPReg, TempReg);
+ gPushR(TempReg);
+ gMoveRMwr(ReceiverResultReg, 3 * BytesPerWord, SPReg);
+ gMoveRMwr(Arg0Reg, 2 * BytesPerWord, SPReg);
+ return;
+ }
+ if (numArgs == 2) {
+ gPushR(Arg1Reg);
+ gMoveMwrR(BytesPerWord * 2, SPReg, TempReg);
+ gPushR(TempReg);
+ gMoveMwrR(BytesPerWord * 2, SPReg, TempReg);
+ gPushR(TempReg);
+ gMoveRMwr(ReceiverResultReg, 4 * BytesPerWord, SPReg);
+ gMoveRMwr(Arg0Reg, 3 * BytesPerWord, SPReg);
+ return;
+ }
  }
- gPushR(TempReg);
- return 0;
 }
 
+
+/* Ensure that the register args are pushed before the retpc for arity <=
+ self numRegArgs.
+ */
+/* This won't be as clumsy on a RISC. But putting the receiver and
+ args above the return address means the CoInterpreter has a
+ single machine-code frame format which saves us a lot of work. */
+
+static void
+genPushRegisterArgsForNumArgs(sqInt numArgs)
+{
+ if (numArgs <= (numRegArgs())) {
+ gMoveMwrR(0, SPReg, TempReg);
+ gMoveRMwr(ReceiverResultReg, 0, SPReg);
+ assert((numRegArgs()) <= 2);
+ if (numArgs > 0) {
+ gPushR(Arg0Reg);
+ if (numArgs > 1) {
+ gPushR(Arg1Reg);
+ }
+ }
+ gPushR(TempReg);
+ }
+}
+
 static sqInt
 genPushRemoteTempLongBytecode(void)
 {
+ ssAllocateRequiredRegand(ClassReg, SendNumArgsReg);
  gMoveMwrR(frameOffsetOfTemporary(byte2), FPReg, ClassReg);
- genLoadSlotsourceRegdestReg(byte1, ClassReg, TempReg);
- gPushR(TempReg);
- return 0;
+ genLoadSlotsourceRegdestReg(byte1, ClassReg, SendNumArgsReg);
+ return ssPushRegister(SendNumArgsReg);
 }
 
 static sqInt
@@ -9239,9 +9772,7 @@
 static sqInt
 genPushTemporaryVariable(sqInt index)
 {
- gMoveMwrR(frameOffsetOfTemporary(index), FPReg, TempReg);
- gPushR(TempReg);
- return 0;
+ return ssPushDesc(simStack[index]);
 }
 
 
@@ -9388,8 +9919,8 @@
 genReturnTopFromBlock(void)
 {
  assert(inBlock);
- flag("currently caller pushes result");
- gPopR(ReceiverResultReg);
+ popToReg(ssTop(), ReceiverResultReg);
+ ssPop(1);
  if (needsFrame) {
  gMoveRR(FPReg, SPReg);
  gPopR(FPReg);
@@ -9398,16 +9929,11 @@
  return 0;
 }
 
-
-/* Return pops receiver and arguments off the stack. Callee pushes the
- result.
- */
-
 static sqInt
 genReturnTopFromMethod(void)
 {
- flag("currently caller pushes result");
- gPopR(ReceiverResultReg);
+ popToReg(ssTop(), ReceiverResultReg);
+ ssPop(1);
  return genUpArrowReturn();
 }
 
@@ -9490,37 +10016,32 @@
 static sqInt
 genSendSupernumArgs(sqInt selector, sqInt numArgs)
 {
- assert(needsFrame);
- if (isYoung(selector)) {
- hasYoungReferent = 1;
- }
- gMoveMwrR(numArgs * BytesPerWord, SPReg, ReceiverResultReg);
- if (numArgs > 2) {
- gMoveCqR(numArgs, SendNumArgsReg);
- }
- gMoveCwR(selector, ClassReg);
- CallSend(superSendTrampolines[((numArgs < (NumSendTrampolines - 1)) ? numArgs : (NumSendTrampolines - 1))]);
- flag("currently caller pushes result");
- gPushR(ReceiverResultReg);
- return 0;
+ marshallSendArguments(numArgs);
+ return genMarshalledSendSupernumArgs(selector, numArgs);
 }
 
+
+/* Generate a trampoline with four arguments.
+ Hack: a negative value indicates an abstract register, a non-negative
+ value indicates a constant. */
+
 static sqInt
+genSendTrampolineFornumArgscalledargargargarg(void *aRoutine, sqInt numArgs, char *aString, sqInt regOrConst0, sqInt regOrConst1, sqInt regOrConst2, sqInt regOrConst3)
+{
+    sqInt startAddress;
+
+ startAddress = methodZoneBase;
+ opcodeIndex = 0;
+ genPushRegisterArgsForNumArgs(numArgs);
+ genTrampolineForcalledcallJumpBarnumArgsargargargargsaveRegsresultRegappendOpcodes(aRoutine, aString, 1, 4, regOrConst0, regOrConst1, regOrConst2, regOrConst3, 0, null, 1);
+ return startAddress;
+}
+
+static sqInt
 genSendnumArgs(sqInt selector, sqInt numArgs)
 {
- if (isYoung(selector)) {
- hasYoungReferent = 1;
- }
- assert(needsFrame);
- gMoveMwrR(numArgs * BytesPerWord, SPReg, ReceiverResultReg);
- if (numArgs > 2) {
- gMoveCqR(numArgs, SendNumArgsReg);
- }
- gMoveCwR(selector, ClassReg);
- CallSend(sendTrampolines[((numArgs < (NumSendTrampolines - 1)) ? numArgs : (NumSendTrampolines - 1))]);
- flag("currently caller pushes result");
- gPushR(ReceiverResultReg);
- return 0;
+ marshallSendArguments(numArgs);
+ return genMarshalledSendnumArgs(selector, numArgs);
 }
 
 static sqInt
@@ -9552,55 +10073,376 @@
  return genJumpTo(target);
 }
 
-
-/* Stack looks like
- receiver (also in ResultReceiverReg)
- arg
- return address */
-
 static sqInt
 genSmallIntegerComparison(sqInt jumpOpcode)
 {
     AbstractInstruction *jumpFail;
     AbstractInstruction *jumpTrue;
 
- gMoveMwrR(BytesPerWord, SPReg, TempReg);
- gMoveRR(TempReg, ClassReg);
+ gMoveRR(Arg0Reg, TempReg);
  jumpFail = genJumpNotSmallIntegerInScratchReg(TempReg);
- gCmpRR(ClassReg, ReceiverResultReg);
+ gCmpRR(Arg0Reg, ReceiverResultReg);
  jumpTrue = gen(jumpOpcode);
  annotateobjRef(gMoveCwR(falseObject(), ReceiverResultReg), falseObject());
- flag("currently caller pushes result");
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpTrue, annotateobjRef(gMoveCwR(trueObject(), ReceiverResultReg), trueObject()));
- gRetN(BytesPerWord * 2);
+ gRetN(0);
  jmpTarget(jumpFail, gLabel());
  return 0;
 }
 
 static sqInt
+genSpecialSelectorArithmetic(void)
+{
+    sqInt argInt;
+    sqInt argIsInt;
+    AbstractInstruction *jumpContinue;
+    AbstractInstruction *jumpNotSmallInts;
+    BytecodeDescriptor *primDescriptor;
+    sqInt rcvrInt;
+    sqInt rcvrIsInt;
+    sqInt result;
+
+ primDescriptor = generatorAt(byte0);
+ argIsInt = (((ssTop()->type)) == SSConstant)
+ && ((((argInt = (ssTop()->constant))) & 1));
+ rcvrIsInt = (((ssValue(1)->type)) == SSConstant)
+ && ((((rcvrInt = (ssValue(1)->constant))) & 1));
+ if (argIsInt
+ && (rcvrIsInt)) {
+ rcvrInt = (rcvrInt >> 1);
+ argInt = (argInt >> 1);
+
+ switch ((primDescriptor->opcode)) {
+ case AddRR:
+ result = rcvrInt + argInt;
+ break;
+ case SubRR:
+ result = rcvrInt - argInt;
+ break;
+ case AndRR:
+ result = rcvrInt && argInt;
+ break;
+ case OrRR:
+ result = rcvrInt || argInt;
+ break;
+ default:
+ error("Case not found and no otherwise clause");
+ }
+ if (isIntegerValue(result)) {
+ annotateBytecode(gLabel());
+ return ssPop(2),ssPushConstant(((result << 1) | 1));
+ }
+ return genSpecialSelectorSend();
+ }
+ if (!(argIsInt
+ || (rcvrIsInt))) {
+ return genSpecialSelectorSend();
+ }
+ if (argIsInt) {
+ ssFlushTo(simStackPtr - 2);
+ popToReg(ssValue(1), ReceiverResultReg);
+ ssPop(2);
+ gMoveRR(ReceiverResultReg, TempReg);
+ }
+ else {
+ marshallSendArguments(1);
+ gMoveRR(Arg0Reg, TempReg);
+ if (!(rcvrIsInt)) {
+ if (isSmallIntegerTagNonZero()) {
+ gAndRR(ReceiverResultReg, TempReg);
+ }
+ else {
+ gOrRR(ReceiverResultReg, TempReg);
+ }
+ }
+ }
+ jumpNotSmallInts = genJumpNotSmallIntegerInScratchReg(TempReg);
+
+ switch ((primDescriptor->opcode)) {
+ case AddRR:
+ if (argIsInt) {
+ gAddCqR(argInt - ConstZero, ReceiverResultReg);
+
+ /* overflow; must undo the damage before continuing */
+
+ jumpContinue = gJumpNoOverflow(0);
+ gSubCqR(argInt - ConstZero, ReceiverResultReg);
+ }
+ else {
+ genRemoveSmallIntegerTagsInScratchReg(ReceiverResultReg);
+ gAddRR(Arg0Reg, ReceiverResultReg);
+
+ /* overflow; must undo the damage before continuing */
+
+ jumpContinue = gJumpNoOverflow(0);
+ if (rcvrIsInt) {
+ gMoveCqR(rcvrInt, ReceiverResultReg);
+ }
+ else {
+ gSubRR(Arg0Reg, ReceiverResultReg);
+ genSetSmallIntegerTagsIn(ReceiverResultReg);
+ }
+ }
+ break;
+ case SubRR:
+ if (argIsInt) {
+ gSubCqR(argInt - ConstZero, ReceiverResultReg);
+
+ /* overflow; must undo the damage before continuing */
+
+ jumpContinue = gJumpNoOverflow(0);
+ gAddCqR(argInt - ConstZero, ReceiverResultReg);
+ }
+ else {
+ genRemoveSmallIntegerTagsInScratchReg(Arg0Reg);
+ gSubRR(Arg0Reg, ReceiverResultReg);
+
+ /* overflow; must undo the damage before continuing */
+
+ jumpContinue = gJumpNoOverflow(0);
+ gAddRR(Arg0Reg, ReceiverResultReg);
+ genSetSmallIntegerTagsIn(Arg0Reg);
+ }
+ break;
+ case AndRR:
+ if (argIsInt) {
+ gAndCqR(argInt, ReceiverResultReg);
+ }
+ else {
+ gAndRR(Arg0Reg, ReceiverResultReg);
+ }
+ jumpContinue = gJump(0);
+ break;
+ case OrRR:
+ if (argIsInt) {
+ gOrCqR(argInt, ReceiverResultReg);
+ }
+ else {
+ gOrRR(Arg0Reg, ReceiverResultReg);
+ }
+ jumpContinue = gJump(0);
+ break;
+ default:
+ error("Case not found and no otherwise clause");
+ }
+ jmpTarget(jumpNotSmallInts, gLabel());
+ if (argIsInt) {
+ gMoveCqR(argInt, Arg0Reg);
+ }
+ genMarshalledSendnumArgs(specialSelector(byte0 - 176), 1);
+ jmpTarget(jumpContinue, gLabel());
+ return 0;
+}
+
+static sqInt
 genSpecialSelectorClass(void)
 {
- gMoveMwrR(0, SPReg, SendNumArgsReg);
+ ssPop(1);
+ ssAllocateRequiredRegand(SendNumArgsReg, ClassReg);
+ ssPush(1);
+ popToReg(ssTop(), SendNumArgsReg);
  genGetClassObjectOfintoscratchReg(SendNumArgsReg, ClassReg, TempReg);
- gMoveRMwr(ClassReg, 0, SPReg);
- return 0;
+ return ssPop(1),ssPushRegister(ClassReg);
 }
 
 static sqInt
+genSpecialSelectorComparison(void)
+{
+    sqInt argInt;
+    sqInt argIsInt;
+    sqInt branchBytecode;
+    BytecodeDescriptor *branchDescriptor;
+    sqInt branchPC;
+    sqInt inlineCAB;
+    AbstractInstruction *jumpNotSmallInts;
+    sqInt postBranchPC;
+    BytecodeDescriptor *primDescriptor;
+    sqInt rcvrInt;
+    sqInt rcvrIsInt;
+    sqInt result;
+    sqInt targetBytecodePC;
+
+ ssFlushTo(simStackPtr - 2);
+ primDescriptor = generatorAt(byte0);
+ argIsInt = (((ssTop()->type)) == SSConstant)
+ && ((((argInt = (ssTop()->constant))) & 1));
+ rcvrIsInt = (((ssValue(1)->type)) == SSConstant)
+ && ((((rcvrInt = (ssValue(1)->constant))) & 1));
+ if (argIsInt
+ && (rcvrIsInt)) {
+ ;
+
+ switch ((primDescriptor->opcode)) {
+ case JumpLess:
+ result = rcvrInt < argInt;
+ break;
+ case JumpLessOrEqual:
+ result = rcvrInt <= argInt;
+ break;
+ case JumpGreater:
+ result = rcvrInt > argInt;
+ break;
+ case JumpGreaterOrEqual:
+ result = rcvrInt >= argInt;
+ break;
+ case JumpZero:
+ result = rcvrInt == argInt;
+ break;
+ case JumpNonZero:
+ result = rcvrInt != argInt;
+ break;
+ default:
+ error("Case not found and no otherwise clause");
+ }
+ annotateBytecode(gLabel());
+ ssPop(2);
+ return ssPushConstant((result
+ ? trueObject()
+ : falseObject()));
+ }
+ branchPC = bytecodePointer + ((primDescriptor->numBytes));
+ branchBytecode = fetchByteofObject(branchPC, methodObj);
+
+ /* Only interested in inlining if followed by a conditional branch. */
+
+ branchDescriptor = generatorAt(branchBytecode);
+
+ /* Further, only interested in inlining = and ~= if there's a SmallInteger constant involved.
+ The relational operators successfully staticaly predict SmallIntegers; the equality operators do not. */
+
+ inlineCAB = ((branchDescriptor->isBranchTrue))
+ || ((branchDescriptor->isBranchFalse));
+ if (inlineCAB
+ && ((((primDescriptor->opcode)) == JumpZero)
+ || (((primDescriptor->opcode)) == JumpNonZero))) {
+ inlineCAB = argIsInt
+ || (rcvrIsInt);
+ }
+ if (!(inlineCAB)) {
+ return genSpecialSelectorSend();
+ }
+ targetBytecodePC = (branchPC + ((branchDescriptor->numBytes))) + (spanForatbyte0in(branchDescriptor, branchPC, branchBytecode, methodObj));
+ postBranchPC = branchPC + ((branchDescriptor->numBytes));
+ if (argIsInt) {
+ ssFlushTo(simStackPtr - 2);
+ popToReg(ssValue(1), ReceiverResultReg);
+ ssPop(2);
+ gMoveRR(ReceiverResultReg, TempReg);
+ }
+ else {
+ marshallSendArguments(1);
+ gMoveRR(Arg0Reg, TempReg);
+ if (!(rcvrIsInt)) {
+ if (isSmallIntegerTagNonZero()) {
+ gAndRR(ReceiverResultReg, TempReg);
+ }
+ else {
+ gOrRR(ReceiverResultReg, TempReg);
+ }
+ }
+ }
+ jumpNotSmallInts = genJumpNotSmallIntegerInScratchReg(TempReg);
+ if (argIsInt) {
+ gCmpCqR(argInt, ReceiverResultReg);
+ }
+ else {
+ gCmpRR(Arg0Reg, ReceiverResultReg);
+ }
+ genoperand(((branchDescriptor->isBranchTrue)
+ ? (primDescriptor->opcode)
+ : inverseBranchFor((primDescriptor->opcode))), ((usqInt)(ensureNonMergeFixupAt(targetBytecodePC - initialPC))));
+ gJump(ensureNonMergeFixupAt(postBranchPC - initialPC));
+ jmpTarget(jumpNotSmallInts, gLabel());
+ if (argIsInt) {
+ gMoveCqR(argInt, Arg0Reg);
+ }
+ return genMarshalledSendnumArgs(specialSelector(byte0 - 176), 1);
+}
+
+static sqInt
 genSpecialSelectorEqualsEquals(void)
 {
+    sqInt argReg;
+    sqInt branchBytecode;
+    BytecodeDescriptor *branchDescriptor;
+    AbstractInstruction *jumpEqual;
     AbstractInstruction *jumpNotEqual;
-    AbstractInstruction *jumpPush;
+    sqInt nextPC;
+    sqInt postBranchPC;
+    BytecodeDescriptor *primDescriptor;
+    sqInt rcvrReg;
+    sqInt resultReg;
+    sqInt targetBytecodePC;
 
- gPopR(TempReg);
- gMoveMwrR(0, SPReg, ClassReg);
- gCmpRR(TempReg, ClassReg);
- jumpNotEqual = gJumpNonZero(0);
- annotateobjRef(gMoveCwR(trueObject(), TempReg), trueObject());
- jumpPush = gJump(0);
- jmpTarget(jumpNotEqual, annotateobjRef(gMoveCwR(falseObject(), TempReg), falseObject()));
- jmpTarget(jumpPush, gMoveRMwr(TempReg, 0, SPReg));
+ ssPop(2);
+ resultReg = availableRegisterOrNil();
+ if (!(resultReg)) {
+ ssAllocateRequiredReg(resultReg = Arg1Reg);
+ }
+ ssPush(2);
+ if ((((ssTop()->type)) == SSConstant)
+ && (!((ssTop()->spilled)))) {
+ if (((ssValue(1)->type)) == SSRegister) {
+
+ /* if spilled we must generate a real pop */
+
+ rcvrReg = (ssValue(1)->registerr);
+ }
+ else {
+ popToReg(ssValue(1), rcvrReg = resultReg);
+ }
+ if (shouldAnnotateObjectReference((ssTop()->constant))) {
+ annotateobjRef(gCmpCwR((ssTop()->constant), rcvrReg), (ssTop()->constant));
+ }
+ else {
+ gCmpCqR((ssTop()->constant), rcvrReg);
+ }
+ ssPop(1);
+ }
+ else {
+ argReg = ssStorePoptoPreferredReg(1, TempReg);
+ rcvrReg = (argReg == resultReg
+ ? TempReg
+ : resultReg);
+ popToReg(ssTop(), rcvrReg);
+ gCmpRR(argReg, rcvrReg);
+ }
+ ssPop(1);
+ ssPushRegister(resultReg);
+ primDescriptor = generatorAt(byte0);
+ nextPC = bytecodePointer + ((primDescriptor->numBytes));
+ branchBytecode = fetchByteofObject(nextPC, methodObj);
+ branchDescriptor = generatorAt(branchBytecode);
+ if (((branchDescriptor->isBranchTrue))
+ || ((branchDescriptor->isBranchFalse))) {
+ ssFlushTo(simStackPtr - 1);
+ targetBytecodePC = (nextPC + ((branchDescriptor->numBytes))) + (spanForatbyte0in(branchDescriptor, nextPC, branchBytecode, methodObj));
+ postBranchPC = nextPC + ((branchDescriptor->numBytes));
+ if (((fixupAt(nextPC - initialPC)->targetInstruction)) == 0) {
+
+ /* The next instruction is dead.  we can skip it. */
+
+ deadCode = 1;
+ ssPop(1);
+ ensureFixupAt(targetBytecodePC - initialPC);
+ ensureFixupAt(postBranchPC - initialPC);
+ }
+ genoperand(((branchDescriptor->isBranchTrue)
+ ? JumpZero
+ : JumpNonZero), ((usqInt)(ensureNonMergeFixupAt(targetBytecodePC - initialPC))));
+ gJump(ensureNonMergeFixupAt(postBranchPC - initialPC));
+ }
+ else {
+ jumpNotEqual = gJumpNonZero(0);
+ annotateobjRef(gMoveCwR(trueObject(), resultReg), trueObject());
+ jumpEqual = gJump(0);
+ jmpTarget(jumpNotEqual, annotateobjRef(gMoveCwR(falseObject(), resultReg), falseObject()));
+ jmpTarget(jumpEqual, gLabel());
+ }
+ if (resultReg == ReceiverResultReg) {
+ (optStatus.isReceiverResultRegLive = 0);
+ }
  return 0;
 }
 
@@ -9618,6 +10460,12 @@
 }
 
 static sqInt
+genSSPushSlotreg(sqInt index, sqInt baseReg)
+{
+ return ssPushBaseoffset(baseReg, slotOffsetOfInstVarIndex(index));
+}
+
+static sqInt
 genStoreAndPopReceiverVariableBytecode(void)
 {
  return genStorePopReceiverVariable(1, byte0 & 7);
@@ -9645,23 +10493,57 @@
 }
 
 static sqInt
+genStoreImmediateInSourceRegslotIndexdestReg(sqInt sourceReg, sqInt index, sqInt destReg)
+{
+ gMoveRMwr(sourceReg, (index * BytesPerWord) + BaseHeaderSize, destReg);
+ return 0;
+}
+
+static sqInt
 genStorePopLiteralVariable(sqInt popBoolean, sqInt litVarIndex)
 {
     sqInt association;
+    sqInt constVal;
+    sqInt topReg;
+    sqInt valueReg;
 
+ flag("with better register allocation this wouldn't need a frame.  e.g. use SendNumArgs instead of ReceiverResultReg");
  assert(needsFrame);
  association = literalofMethod(litVarIndex, methodObj);
- annotateobjRef(gMoveCwR(association, ReceiverResultReg), association);
- if (popBoolean) {
- gPopR(ClassReg);
+ (optStatus.isReceiverResultRegLive = 0);
+ if ((((ssTop()->type)) == SSConstant)
+ && (isImmediate((ssTop()->constant)))) {
+ constVal = (ssTop()->constant);
+ if (popBoolean) {
+ ssPop(1);
+ }
+ ssAllocateRequiredReg(ReceiverResultReg);
+ annotateobjRef(gMoveCwR(association, ReceiverResultReg), association);
+ gMoveCqR(constVal, TempReg);
+ if (traceStores > 0) {
+ CallRT(ceTraceStoreTrampoline);
+ }
+ return genStoreImmediateInSourceRegslotIndexdestReg(TempReg, ValueIndex, ReceiverResultReg);
  }
- else {
- gMoveMwrR(0, SPReg, ClassReg);
+ if ((((topReg = registerOrNil(ssTop()))) == null)
+ || (topReg == ReceiverResultReg)) {
+ topReg = ClassReg;
  }
+ ssPop(1);
+ ssAllocateRequiredReg(topReg);
+ ssPush(1);
+ flag("but what if we don't pop?  The top reg is still potentially trashed in the call;. think this through");
+ valueReg = ssStorePoptoPreferredReg(popBoolean, topReg);
+ if (valueReg == ReceiverResultReg) {
+ gMoveRR(valueReg, topReg);
+ }
+ ssAllocateCallReg(ReceiverResultReg);
+ annotateobjRef(gMoveCwR(association, ReceiverResultReg), association);
  if (traceStores > 0) {
+ gMoveRR(topReg, TempReg);
  CallRT(ceTraceStoreTrampoline);
  }
- return genStoreSourceRegslotIndexdestRegscratchReg(ClassReg, ValueIndex, ReceiverResultReg, TempReg);
+ return genStoreSourceRegslotIndexdestRegscratchReg(topReg, ValueIndex, ReceiverResultReg, TempReg);
 }
 
 static sqInt
@@ -9671,21 +10553,28 @@
     AbstractInstruction *jmpSingle;
 
  assert(needsFrame);
- gMoveMwrR(FoxMFReceiver, FPReg, ReceiverResultReg);
+ ssFlushUpThroughReceiverVariable(slotIndex);
+ ensureReceiverResultRegContainsSelf();
+ ssPop(1);
+ ssAllocateCallRegand(ClassReg, SendNumArgsReg);
+ ssPush(1);
  genLoadSlotsourceRegdestReg(SenderIndex, ReceiverResultReg, TempReg);
- gMoveMwrR(0, SPReg, ClassReg);
+ flag("why do we always pop??");
+ flag("but what if we don't pop?  The top reg is still potentially trashed in the call;. think this through");
+ popToReg(ssTop(), ClassReg);
  jmpSingle = genJumpNotSmallIntegerInScratchReg(TempReg);
  gMoveCqR(slotIndex, SendNumArgsReg);
  CallRT(ceStoreContextInstVarTrampoline);
  jmpDone = gJump(0);
  jmpTarget(jmpSingle, gLabel());
  if (traceStores > 0) {
+ gMoveRR(ClassReg, TempReg);
  CallRT(ceTraceStoreTrampoline);
  }
  genStoreSourceRegslotIndexdestRegscratchReg(ClassReg, slotIndex, ReceiverResultReg, TempReg);
  jmpTarget(jmpDone, gLabel());
  if (popBoolean) {
- gAddCqR(BytesPerWord, SPReg);
+ ssPop(1);
  }
  return 0;
 }
@@ -9693,48 +10582,100 @@
 static sqInt
 genStorePopReceiverVariable(sqInt popBoolean, sqInt slotIndex)
 {
- if (needsFrame) {
- gMoveMwrR(FoxMFReceiver, FPReg, ReceiverResultReg);
+    sqInt constVal;
+    sqInt topReg;
+    sqInt valueReg;
+
+ ssFlushUpThroughReceiverVariable(slotIndex);
+ if ((((ssTop()->type)) == SSConstant)
+ && (isImmediate((ssTop()->constant)))) {
+ constVal = (ssTop()->constant);
+ if (popBoolean) {
+ ssPop(1);
+ }
+ ensureReceiverResultRegContainsSelf();
+ gMoveCqR(constVal, TempReg);
+ if (traceStores > 0) {
+ CallRT(ceTraceStoreTrampoline);
+ }
+ return genStoreImmediateInSourceRegslotIndexdestReg(TempReg, slotIndex, ReceiverResultReg);
  }
- if (popBoolean) {
- gPopR(ClassReg);
+ if ((((topReg = registerOrNil(ssTop()))) == null)
+ || (topReg == ReceiverResultReg)) {
+ topReg = ClassReg;
  }
- else {
- gMoveMwrR(0, SPReg, ClassReg);
+ ssPop(1);
+ ssAllocateCallReg(topReg);
+ ssPush(1);
+ flag("but what if we don't pop?  The top reg is still potentially trashed in the call;. think this through");
+ valueReg = ssStorePoptoPreferredReg(popBoolean, topReg);
+ if (valueReg == ReceiverResultReg) {
+ gMoveRR(valueReg, topReg);
  }
+ ensureReceiverResultRegContainsSelf();
  if (traceStores > 0) {
+ gMoveRR(topReg, TempReg);
  CallRT(ceTraceStoreTrampoline);
  }
- return genStoreSourceRegslotIndexdestRegscratchReg(ClassReg, slotIndex, ReceiverResultReg, TempReg);
+ return genStoreSourceRegslotIndexdestRegscratchReg(topReg, slotIndex, ReceiverResultReg, TempReg);
 }
 
 static sqInt
 genStorePopRemoteTempAt(sqInt popBoolean, sqInt slotIndex, sqInt remoteTempIndex)
 {
+    sqInt constVal;
+    sqInt topReg;
+    sqInt valueReg;
+
  assert(needsFrame);
- if (popBoolean) {
- gPopR(ClassReg);
+ (optStatus.isReceiverResultRegLive = 0);
+ if ((((ssTop()->type)) == SSConstant)
+ && (isImmediate((ssTop()->constant)))) {
+ constVal = (ssTop()->constant);
+ if (popBoolean) {
+ ssPop(1);
+ }
+ ssAllocateRequiredReg(ReceiverResultReg);
+ gMoveMwrR(frameOffsetOfTemporary(remoteTempIndex), FPReg, ReceiverResultReg);
+ gMoveCqR(constVal, TempReg);
+ if (traceStores > 0) {
+ CallRT(ceTraceStoreTrampoline);
+ }
+ return genStoreImmediateInSourceRegslotIndexdestReg(TempReg, slotIndex, ReceiverResultReg);
  }
- else {
- gMoveMwrR(0, SPReg, ClassReg);
+ if ((((topReg = registerOrNil(ssTop()))) == null)
+ || (topReg == ReceiverResultReg)) {
+ topReg = ClassReg;
  }
+ ssPop(1);
+ ssAllocateRequiredReg(topReg);
+ ssPush(1);
+ flag("but what if we don't pop?  The top reg is still potentially trashed in the call;. think this through");
+ valueReg = ssStorePoptoPreferredReg(popBoolean, topReg);
+ if (valueReg == ReceiverResultReg) {
+ gMoveRR(valueReg, topReg);
+ }
+ if (!(popBoolean)) {
+ ssPop(1);
+ ssPushRegister(topReg);
+ }
+ ssAllocateCallReg(ReceiverResultReg);
  gMoveMwrR(frameOffsetOfTemporary(remoteTempIndex), FPReg, ReceiverResultReg);
  if (traceStores > 0) {
+ gMoveRR(topReg, TempReg);
  CallRT(ceTraceStoreTrampoline);
  }
- return genStoreSourceRegslotIndexdestRegscratchReg(ClassReg, slotIndex, ReceiverResultReg, TempReg);
+ return genStoreSourceRegslotIndexdestRegscratchReg(topReg, slotIndex, ReceiverResultReg, TempReg);
 }
 
 static sqInt
 genStorePopTemporaryVariable(sqInt popBoolean, sqInt tempIndex)
 {
- if (popBoolean) {
- gPopR(TempReg);
- }
- else {
- gMoveMwrR(0, SPReg, TempReg);
- }
- gMoveRMwr(TempReg, frameOffsetOfTemporary(tempIndex), FPReg);
+    sqInt reg;
+
+ ssFlushUpThroughTemporaryVariable(tempIndex);
+ reg = ssStorePoptoPreferredReg(popBoolean, TempReg);
+ gMoveRMwr(reg, frameOffsetOfTemporary(tempIndex), FPReg);
  return 0;
 }
 
@@ -9827,28 +10768,6 @@
 }
 
 
-/* Generate a trampoline with two arguments.
- Hack: a negative value indicates an abstract register, a non-negative
- value indicates a constant. */
-
-static sqInt
-genTrampolineForcalledargarg(void *aRoutine, char *aString, sqInt regOrConst0, sqInt regOrConst1)
-{
- return genTrampolineForcalledcallJumpBarnumArgsargargargargsaveRegsresultRegappendOpcodes(aRoutine, aString, 1, 2, regOrConst0, regOrConst1, null, null, 0, null, 0);
-}
-
-
-/* Generate a trampoline with four arguments.
- Hack: a negative value indicates an abstract register, a non-negative
- value indicates a constant. */
-
-static sqInt
-genTrampolineForcalledargargargarg(void *aRoutine, char *aString, sqInt regOrConst0, sqInt regOrConst1, sqInt regOrConst2, sqInt regOrConst3)
-{
- return genTrampolineForcalledcallJumpBarnumArgsargargargargsaveRegsresultRegappendOpcodes(aRoutine, aString, 1, 4, regOrConst0, regOrConst1, regOrConst2, regOrConst3, 0, null, 0);
-}
-
-
 /* Generate a trampoline with two arguments that answers a result.
  Hack: a negative value indicates an abstract register, a non-negative
  value indicates a constant. */
@@ -9918,7 +10837,6 @@
 static sqInt
 genUpArrowReturn(void)
 {
- flag("currently caller pushes result");
  if (inBlock) {
  assert(needsFrame);
  annotateBytecode(CallRT(ceNonLocalReturnTrampoline));
@@ -9927,8 +10845,14 @@
  if (needsFrame) {
  gMoveRR(FPReg, SPReg);
  gPopR(FPReg);
+ gRetN((methodOrBlockNumArgs + 1) * BytesPerWord);
  }
- gRetN((methodOrBlockNumArgs + 1) * BytesPerWord);
+ else {
+ gRetN(((methodOrBlockNumArgs > (numRegArgs()))
+ || (regArgsHaveBeenPushed)
+ ? (methodOrBlockNumArgs + 1) * BytesPerWord
+ : 0));
+ }
  return 0;
 }
 
@@ -10154,6 +11078,7 @@
  (methodLabel->opcode = Label);
  ((methodLabel->operands))[0] = 0;
  ((methodLabel->operands))[1] = 0;
+ callerSavedRegMask = callerSavedRegisterMask(backEnd);
 }
 
 void
@@ -10178,6 +11103,9 @@
 
 /* Make sure there's a flagged fixup at the targetIndex (pc relative to first
  pc) in fixups.
+ These are the targets of backward branches. A backward branch fixup's
+ simStackPtr needs to be set when generating the code for the bytecode at
+ the targetIndex.
  Initially a fixup's target is just a flag. Later on it is replaced with a
  proper instruction. */
 
@@ -10187,7 +11115,8 @@
     BytecodeFixup *fixup;
 
  fixup = fixupAt(targetIndex);
- (fixup->targetInstruction = ((AbstractInstruction *) 1));
+ (fixup->targetInstruction = ((AbstractInstruction *) 2));
+ (fixup->simStackPtr = -2);
  return fixup;
 }
 
@@ -10218,7 +11147,73 @@
  return 3;
 }
 
+static void
+initSimStackForFramefulMethod(sqInt startpc)
+{
+    CogSimStackEntry *desc;
+    sqInt i;
 
+ (optStatus.isReceiverResultRegLive = 0);
+ (simSelf.type = SSBaseOffset);
+ (simSelf.registerr = FPReg);
+ (simSelf.offset = FoxMFReceiver);
+ (simSelf.spilled = 1);
+
+ /* N.B. Includes num args */
+
+ simSpillBase = methodOrBlockNumTemps;
+
+ /* args */
+
+ simStackPtr = simSpillBase - 1;
+ for (i = 0; i <= (methodOrBlockNumArgs - 1); i += 1) {
+ desc = simStackAt(i);
+ (desc->type = SSBaseOffset);
+ (desc->registerr = FPReg);
+ (desc->offset = FoxCallerSavedIP + ((methodOrBlockNumArgs - i) * BytesPerWord));
+ (desc->spilled = 1);
+ (desc->bcptr = startpc);
+ }
+ for (i = methodOrBlockNumArgs; i <= simStackPtr; i += 1) {
+ desc = simStackAt(i);
+ (desc->type = SSBaseOffset);
+ (desc->registerr = FPReg);
+ (desc->offset = FoxMFReceiver - (((i - methodOrBlockNumArgs) + 1) * BytesPerWord));
+ (desc->spilled = 1);
+ (desc->bcptr = startpc);
+ }
+}
+
+static void
+initSimStackForFramelessMethod(sqInt startpc)
+{
+    CogSimStackEntry *desc;
+
+ (simSelf.type = SSRegister);
+ (simSelf.registerr = ReceiverResultReg);
+ (simSelf.spilled = 0);
+ (optStatus.isReceiverResultRegLive = 1);
+ (optStatus.ssEntry = (&simSelf));
+ assert(methodOrBlockNumTemps == methodOrBlockNumArgs);
+ simStackPtr = simSpillBase = -1;
+ assert((numRegArgs()) <= 2);
+ if (((methodOrBlockNumArgs >= 1) && (methodOrBlockNumArgs <= (numRegArgs())))) {
+ desc = simStackAt(0);
+ (desc->type = SSRegister);
+ (desc->registerr = Arg0Reg);
+ (desc->spilled = 0);
+ (desc->bcptr = startpc);
+ if (methodOrBlockNumArgs > 1) {
+ desc = simStackAt(1);
+ (desc->type = SSRegister);
+ (desc->registerr = Arg1Reg);
+ (desc->spilled = 0);
+ (desc->bcptr = startpc);
+ }
+ }
+}
+
+
 /* Answer the inline cache tag for the return address of a send. */
 
 static sqInt
@@ -10311,6 +11306,72 @@
 }
 
 static sqInt
+inverseBranchFor(sqInt opcode)
+{
+
+ switch (opcode) {
+ case JumpLongZero:
+ return JumpLongNonZero;
+
+ case JumpLongNonZero:
+ return JumpLongZero;
+
+ case JumpZero:
+ return JumpNonZero;
+
+ case JumpNonZero:
+ return JumpZero;
+
+ case JumpNegative:
+ return JumpNonNegative;
+
+ case JumpNonNegative:
+ return JumpNegative;
+
+ case JumpOverflow:
+ return JumpNoOverflow;
+
+ case JumpNoOverflow:
+ return JumpOverflow;
+
+ case JumpCarry:
+ return JumpNoCarry;
+
+ case JumpNoCarry:
+ return JumpCarry;
+
+ case JumpLess:
+ return JumpGreaterOrEqual;
+
+ case JumpGreaterOrEqual:
+ return JumpLess;
+
+ case JumpGreater:
+ return JumpLessOrEqual;
+
+ case JumpLessOrEqual:
+ return JumpGreater;
+
+ case JumpBelow:
+ return JumpAboveOrEqual;
+
+ case JumpAboveOrEqual:
+ return JumpBelow;
+
+ case JumpAbove:
+ return JumpBelowOrEqual;
+
+ case JumpBelowOrEqual:
+ return JumpAbove;
+
+ default:
+ error("Case not found and no otherwise clause");
+ }
+ error("invalid opcode for inverse");
+ return 0;
+}
+
+static sqInt
 isAFixup(AbstractInstruction * self_in_isAFixup, void *fixupOrAddress)
 {
  return addressIsInFixups(fixupOrAddress);
@@ -10396,6 +11457,12 @@
  || (((target >= methodZoneBase) && (target <= (zoneLimit()))));
 }
 
+static sqInt
+isSmallIntegerTagNonZero(void)
+{
+ return 1;
+}
+
 static AbstractInstruction *
 gJumpAboveOrEqual(void *jumpTarget)
 {
@@ -10503,6 +11570,12 @@
 }
 
 static AbstractInstruction *
+gJumpNoOverflow(void *jumpTarget)
+{
+ return genoperand(JumpNoOverflow, ((sqInt)jumpTarget));
+}
+
+static AbstractInstruction *
 gJumpOverflow(void *jumpTarget)
 {
  return genoperand(JumpOverflow, ((sqInt)jumpTarget));
@@ -10728,7 +11801,20 @@
  return ((((byteAt(followingAddress - 1)) << 24) + ((byteAt(followingAddress - 2)) << 16)) + ((byteAt(followingAddress - 3)) << 8)) + (byteAt(followingAddress - 4));
 }
 
+static sqInt
+liveRegisters(void)
+{
+    sqInt i;
+    sqInt regsSet;
 
+ regsSet = 0;
+ for (i = (((simSpillBase < 0) ? 0 : simSpillBase)); i <= simStackPtr; i += 1) {
+ regsSet = regsSet | (registerMask(simStackAt(i)));
+ }
+ return regsSet;
+}
+
+
 /* Answer the byte size of a MoveCwR opcode's corresponding machine code */
 
 static sqInt
@@ -11430,6 +12516,45 @@
  return 0;
 }
 
+
+/* Spill everything on the simulated stack that needs spilling (that below
+ receiver and arguments).
+ Marshall receiver and arguments to stack and/or registers depending on arg
+ count. If the args don't fit in registers push receiver and args (spill
+ everything), but still assign
+ the receiver to ReceiverResultReg. */
+
+static void
+marshallSendArguments(sqInt numArgs)
+{
+ if (numArgs > (numRegArgs())) {
+ ssFlushTo(simStackPtr);
+ storeToReg(simStackAt(simStackPtr - numArgs), ReceiverResultReg);
+ }
+ else {
+ ssFlushTo((simStackPtr - numArgs) - 1);
+ if (numArgs > 0) {
+ if (((numRegArgs()) > 1)
+ && (numArgs > 1)) {
+ ssAllocateRequiredRegupThrough(Arg0Reg, simStackPtr - 2);
+ ssAllocateRequiredRegupThrough(Arg1Reg, simStackPtr - 1);
+ }
+ else {
+ ssAllocateRequiredRegupThrough(Arg0Reg, simStackPtr - 1);
+ }
+ }
+ if (((numRegArgs()) > 1)
+ && (numArgs > 1)) {
+ popToReg(simStackAt(simStackPtr), Arg1Reg);
+ }
+ if (numArgs > 0) {
+ popToReg(simStackAt((simStackPtr - numArgs) + 1), Arg0Reg);
+ }
+ popToReg(simStackAt(simStackPtr - numArgs), ReceiverResultReg);
+ }
+ ssPop(numArgs + 1);
+}
+
 usqInt
 maxCogMethodAddress(void)
 {
@@ -11509,10 +12634,64 @@
  : absPC);
 }
 
+
+/* Discard type information because of a control-flow merge. */
+
+static void
+mergeAtfrom(CogSimStackEntry * self_in_mergeAtfrom, sqInt baseOffset, sqInt baseRegister)
+{
+ assert((self_in_mergeAtfrom->spilled));
+ if (((self_in_mergeAtfrom->type)) == SSSpill) {
+ assert((((self_in_mergeAtfrom->offset)) == baseOffset)
+ && (((self_in_mergeAtfrom->registerr)) == baseRegister));
+ }
+ else {
+ (self_in_mergeAtfrom->type) = SSSpill;
+ (self_in_mergeAtfrom->offset) = baseOffset;
+ (self_in_mergeAtfrom->registerr) = baseRegister;
+ }
+}
+
+
+/* Merge control flow at a fixup. The fixup holds the simStackPtr at the jump
+ to this target.
+ See stackToRegisterMapping on the class side for a full description. */
+
+static void
+mergeafterReturn(BytecodeFixup *fixup, sqInt mergeFollowsReturn)
+{
+    sqInt i;
+
+ traceMerge(fixup);
+ (optStatus.isReceiverResultRegLive = 0);
+ if (mergeFollowsReturn) {
+ assert((((usqInt)((fixup->targetInstruction)))) >= 2);
+ simStackPtr = (fixup->simStackPtr);
+ }
+ if ((((usqInt)((fixup->targetInstruction)))) <= 2) {
+ ssFlushTo(simStackPtr);
+ if (((fixup->simStackPtr)) <= -2) {
+ (fixup->simStackPtr = simStackPtr);
+ }
+ (fixup->targetInstruction = gLabel());
+ }
+ assert(simStackPtr >= ((fixup->simStackPtr)));
+ ;
+ simStackPtr = (fixup->simStackPtr);
+
+ /* For now throw away all type information for values on the stack, but sometime consider
+ the more sophisticated merge described in the class side stackToRegisterMapping. */
+
+ simSpillBase = methodOrBlockNumTemps;
+ for (i = methodOrBlockNumTemps; i <= simStackPtr; i += 1) {
+ mergeAtfrom(simStackAt(i), FoxMFReceiver - (((i - methodOrBlockNumArgs) + 1) * BytesPerOop), FPReg);
+ }
+}
+
 static sqInt
 methodAbortTrampolineFor(sqInt numArgs)
 {
- return ceMethodAbortTrampoline;
+ return methodAbortTrampolines[((numArgs < ((numRegArgs()) + 1)) ? numArgs : ((numRegArgs()) + 1))];
 }
 
 static CogMethod *
@@ -11567,7 +12746,13 @@
  return genoperand(NegateR, reg);
 }
 
+static AbstractInstruction *
+gNop(void)
+{
+ return gen(Nop);
+}
 
+
 /* Compute the distance to the logically subsequent bytecode, i.e. skip over
  blocks.
  */
@@ -11894,7 +13079,7 @@
 static sqInt
 picAbortTrampolineFor(sqInt numArgs)
 {
- return cePICAbortTrampoline;
+ return picAbortTrampolines[((numArgs < ((numRegArgs()) + 1)) ? numArgs : ((numRegArgs()) + 1))];
 }
 
 
@@ -11922,7 +13107,37 @@
  }
 }
 
+static void
+popToReg(CogSimStackEntry * self_in_popToReg, sqInt reg)
+{
+ if ((self_in_popToReg->spilled)) {
+ gPopR(reg);
+ return;
+ }
+
+ switch ((self_in_popToReg->type)) {
+ case SSBaseOffset:
+ gMoveMwrR((self_in_popToReg->offset), (self_in_popToReg->registerr), reg);
+ break;
+ case SSConstant:
+ if (shouldAnnotateObjectReference((self_in_popToReg->constant))) {
+ annotateobjRef(gMoveCwR((self_in_popToReg->constant), reg), (self_in_popToReg->constant));
+ }
+ else {
+ gMoveCqR((self_in_popToReg->constant), reg);
+ }
+ break;
+ case SSRegister:
+ if (reg != ((self_in_popToReg->registerr))) {
+ gMoveRR((self_in_popToReg->registerr), reg);
+ }
+ break;
+ default:
+ error("Case not found and no otherwise clause");
+ }
+}
 
+
 /* If there is a generator for the current primitive then answer it;
  otherwise answer nil. */
 
@@ -12110,15 +13325,45 @@
 }
 
 
-/* Dummy implementation for CogFooCompiler>callerSavedRegisterMask
- which doesn't get pruned due to Slang limitations. */
+/* Answer a bit mask for the receiver's register, if any. */
 
 static sqInt
+registerMask(CogSimStackEntry * self_in_registerMask)
+{
+ return ((((self_in_registerMask->type)) == SSBaseOffset)
+ || (((self_in_registerMask->type)) == SSRegister)
+ ? registerMaskFor((self_in_registerMask->registerr))
+ : 0);
+}
+
+
+/* Answer a bit mask identifying the symbolic register.
+ Registers are negative numbers. */
+
+static sqInt
+registerMaskFor(sqInt reg)
+{
+ return (((1 - reg) < 0) ? ((usqInt) 1 >> -(1 - reg)) : ((usqInt) 1 << (1 - reg)));
+}
+
+
+/* Answer a bit mask identifying the symbolic registers.
+ Registers are negative numbers. */
+
+static sqInt
 registerMaskForandand(sqInt reg1, sqInt reg2, sqInt reg3)
 {
- return 0;
+ return (((((1 - reg1) < 0) ? ((usqInt) 1 >> -(1 - reg1)) : ((usqInt) 1 << (1 - reg1)))) | ((((1 - reg2) < 0) ? ((usqInt) 1 >> -(1 - reg2)) : ((usqInt) 1 << (1 - reg2))))) | ((((1 - reg3) < 0) ? ((usqInt) 1 >> -(1 - reg3)) : ((usqInt) 1 << (1 - reg3))));
 }
 
+static sqInt
+registerOrNil(CogSimStackEntry * self_in_registerOrNil)
+{
+ return (((self_in_registerOrNil->type)) == SSRegister
+ ? (self_in_registerOrNil->registerr)
+ : 0);
+}
+
 static void
 relocateAndPruneYoungReferrers(void)
 {
@@ -12411,12 +13656,21 @@
 }
 
 
-/* See the subclass for explanation. */
+/* We must ensure the ReceiverResultReg is live across the store check so
+ that we can store into receiver inst vars in a frameless method since self
+ exists only in ReceiverResultReg in a frameless method. So if
+ ReceiverResultReg is
+ caller-saved we use the fact that ceStoreCheck: answers its argument to
+ reload ReceiverResultReg cheaply. Otherwise we don't care about the result
+ and use the cResultRegister, effectively a no-op (see
+ compileTrampoline...)  */
 
 static sqInt
 returnRegForStoreCheck(void)
 {
- return cResultRegister(backEnd);
+ return ((registerMaskFor(ReceiverResultReg)) & callerSavedRegMask
+ ? ReceiverResultReg
+ : cResultRegister(backEnd));
 }
 
 
@@ -12562,6 +13816,7 @@
     BytecodeDescriptor *descriptor;
     sqInt end;
     sqInt pc;
+    sqInt pushingNils;
     sqInt stackDelta;
 
  needsFrame = 0;
@@ -12569,6 +13824,8 @@
  pc = (blockStart->startpc);
  end = ((blockStart->startpc)) + ((blockStart->span));
  stackDelta = 0;
+ pushingNils = 1;
+ (blockStart->numInitialNils = 0);
  while (pc < end) {
  byte0 = fetchByteofObject(pc, methodObj);
  descriptor = generatorAt(byte0);
@@ -12580,12 +13837,20 @@
  stackDelta += (descriptor->stackDelta);
  }
  }
+ if (pushingNils) {
+ if ((pushingNils = (((descriptor->generator)) == (genPushConstantNilBytecode))
+ && (((fixupAt(pc - initialPC)->targetInstruction)) == 0))) {
+ assert(((descriptor->numBytes)) == 1);
+ (blockStart->numInitialNils = ((blockStart->numInitialNils)) + 1);
+ }
+ }
  pc = nextBytecodePCForatbyte0in(descriptor, pc, byte0, methodObj);
  }
  if (!(needsFrame)) {
  if (stackDelta < 0) {
  error("negative stack delta in block; block contains bogus code or internal error");
  }
+ (blockStart->numInitialNils = 0);
  while (stackDelta > 0) {
  descriptor = generatorAt(fetchByteofObject((blockStart->startpc), methodObj));
  if (((descriptor->generator)) != (genPushConstantNilBytecode)) {
@@ -12908,7 +14173,289 @@
  }
 }
 
+static void
+ssAllocateCallReg(sqInt requiredReg1)
+{
+ ssAllocateRequiredRegMaskupThrough(callerSavedRegMask | (registerMaskFor(requiredReg1)), simStackPtr);
+}
+
+static void
+ssAllocateCallRegand(sqInt requiredReg1, sqInt requiredReg2)
+{
+ ssAllocateRequiredRegMaskupThrough(callerSavedRegMask | ((registerMaskFor(requiredReg1)) | (registerMaskFor(requiredReg2))), simStackPtr);
+}
+
 static sqInt
+ssAllocatePreferredReg(sqInt preferredReg)
+{
+    sqInt i;
+    sqInt lastPreferred;
+    sqInt liveRegs;
+    sqInt preferredMask;
+    sqInt reg;
+
+
+ /* compute live regs while noting the last occurrence of preferredReg.
+ If there are none free we must spill from simSpillBase to last occurrence. */
+
+ lastPreferred = -1;
+ preferredMask = registerMaskFor(preferredReg);
+ liveRegs = registerMaskForandand(TempReg, FPReg, SPReg);
+ for (i = (((simSpillBase < 0) ? 0 : simSpillBase)); i <= simStackPtr; i += 1) {
+ liveRegs = liveRegs | (registerMask(simStackAt(i)));
+ if ((liveRegs & preferredMask) != 0) {
+ lastPreferred = i;
+ }
+ }
+ if ((liveRegs & (registerMaskFor(preferredReg))) == 0) {
+ return preferredReg;
+ }
+ for (reg = GPRegMin; reg <= GPRegMax; reg += 1) {
+ if ((liveRegs & (registerMaskFor(reg))) == 0) {
+ return reg;
+ }
+ }
+ ssFlushTo(lastPreferred);
+ assert(((liveRegisters()) & preferredMask) == 0);
+ return preferredReg;
+}
+
+static void
+ssAllocateRequiredRegMaskupThrough(sqInt requiredRegsMask, sqInt stackPtr)
+{
+    sqInt i;
+    sqInt lastRequired;
+    sqInt liveRegs;
+
+
+ /* compute live regs while noting the last occurrence of required regs.
+ If these are not free we must spill from simSpillBase to last occurrence.
+ Note we are conservative here; we could allocate FPReg in frameless methods. */
+
+ lastRequired = -1;
+ liveRegs = registerMaskForandand(TempReg, FPReg, SPReg);
+ for (i = (((simSpillBase < 0) ? 0 : simSpillBase)); i <= stackPtr; i += 1) {
+ liveRegs = liveRegs | (registerMask(simStackAt(i)));
+ if ((liveRegs & requiredRegsMask) != 0) {
+ lastRequired = i;
+ }
+ }
+ if (!((liveRegs & requiredRegsMask) == 0)) {
+ ssFlushTo(lastRequired);
+ assert(((liveRegisters()) & requiredRegsMask) == 0);
+ }
+}
+
+static void
+ssAllocateRequiredReg(sqInt requiredReg)
+{
+ ssAllocateRequiredRegMaskupThrough(registerMaskFor(requiredReg), simStackPtr);
+}
+
+static void
+ssAllocateRequiredRegand(sqInt requiredReg1, sqInt requiredReg2)
+{
+ ssAllocateRequiredRegMaskupThrough((registerMaskFor(requiredReg1)) | (registerMaskFor(requiredReg2)), simStackPtr);
+}
+
+static void
+ssAllocateRequiredRegupThrough(sqInt requiredReg, sqInt stackPtr)
+{
+ ssAllocateRequiredRegMaskupThrough(registerMaskFor(requiredReg), stackPtr);
+}
+
+static void
+ssFlushTo(sqInt index)
+{
+    sqInt i;
+
+ for (i = methodOrBlockNumTemps; i <= (simSpillBase - 1); i += 1) {
+ assert((simStackAt(i)->spilled));
+ }
+ if (simSpillBase <= index) {
+ for (i = (((simSpillBase < 0) ? 0 : simSpillBase)); i <= index; i += 1) {
+ assert(needsFrame);
+ ensureSpilledAtfrom(simStackAt(i), frameOffsetOfTemporary(i), FPReg);
+ }
+ simSpillBase = index + 1;
+ }
+}
+
+
+/* Any occurrences on the stack of the value being stored must
+ be flushed, and hence any values colder than them stack. */
+
+static void
+ssFlushUpThroughReceiverVariable(sqInt slotIndex)
+{
+    CogSimStackEntry *desc;
+    sqInt index;
+
+ for (index = simStackPtr; index >= (((simSpillBase < 0) ? 0 : simSpillBase)); index += -1) {
+ desc = simStackAt(index);
+ if ((((desc->type)) == SSBaseOffset)
+ && ((((desc->registerr)) == ReceiverResultReg)
+ && (((desc->offset)) == (slotOffsetOfInstVarIndex(slotIndex))))) {
+ ssFlushTo(index);
+ return;
+ }
+ }
+}
+
+
+/* Any occurrences on the stack of the value being stored must
+ be flushed, and hence any values colder than them stack. */
+
+static void
+ssFlushUpThroughTemporaryVariable(sqInt tempIndex)
+{
+    CogSimStackEntry *desc;
+    sqInt index;
+
+ for (index = simStackPtr; index >= simSpillBase; index += -1) {
+ desc = simStackAt(index);
+ if ((((desc->type)) == SSBaseOffset)
+ && ((((desc->registerr)) == FPReg)
+ && (((desc->offset)) == (frameOffsetOfTemporary(tempIndex))))) {
+ ssFlushTo(index);
+ return;
+ }
+ }
+}
+
+static void
+ssPop(sqInt n)
+{
+ assert(((simStackPtr - n) >= (methodOrBlockNumTemps - 1))
+ || ((!needsFrame)
+ && ((simStackPtr - n) >= -1)));
+ simStackPtr -= n;
+}
+
+static sqInt
+ssPushBaseoffset(sqInt reg, sqInt offset)
+{
+    CogSimStackEntry * cascade0;
+
+ ssPush(1);
+ if (simSpillBase > simStackPtr) {
+ simSpillBase = ((simStackPtr < 0) ? 0 : simStackPtr);
+ }
+ cascade0 = ssTop();
+ (cascade0->type = SSBaseOffset);
+ (cascade0->registerr = reg);
+ (cascade0->offset = offset);
+ (cascade0->spilled = 0);
+ (cascade0->bcptr = bytecodePointer);
+ return 0;
+}
+
+static sqInt
+ssPushConstant(sqInt literal)
+{
+    CogSimStackEntry * cascade0;
+
+ ssPush(1);
+ if (simSpillBase > simStackPtr) {
+ simSpillBase = ((simStackPtr < 0) ? 0 : simStackPtr);
+ }
+ cascade0 = ssTop();
+ (cascade0->type = SSConstant);
+ (cascade0->constant = literal);
+ (cascade0->spilled = 0);
+ (cascade0->bcptr = bytecodePointer);
+ return 0;
+}
+
+static sqInt
+ssPushDesc(CogSimStackEntry simStackEntry)
+{
+ if (((simStackEntry.type)) == SSSpill) {
+ (simStackEntry.type = SSBaseOffset);
+ }
+ (simStackEntry.spilled = 0);
+ (simStackEntry.bcptr = bytecodePointer);
+ simStack[(simStackPtr += 1)] = simStackEntry;
+ if (simSpillBase > simStackPtr) {
+ simSpillBase = ((simStackPtr < 0) ? 0 : simStackPtr);
+ }
+ return 0;
+}
+
+static sqInt
+ssPushRegister(sqInt reg)
+{
+    CogSimStackEntry * cascade0;
+
+ ssPush(1);
+ if (simSpillBase > simStackPtr) {
+ simSpillBase = ((simStackPtr < 0) ? 0 : simStackPtr);
+ }
+ cascade0 = ssTop();
+ (cascade0->type = SSRegister);
+ (cascade0->registerr = reg);
+ (cascade0->spilled = 0);
+ (cascade0->bcptr = bytecodePointer);
+ return 0;
+}
+
+static void
+ssPush(sqInt n)
+{
+ simStackPtr += n;
+}
+
+
+/* Store or pop the top simulated stack entry to a register.
+ Pop to preferredReg if the entry is not itself a register.
+ Answer the actual register the result ends up in. */
+
+static sqInt
+ssStorePoptoPreferredReg(sqInt popBoolean, sqInt preferredReg)
+{
+    sqInt actualReg;
+
+ actualReg = preferredReg;
+ if (popBoolean) {
+ if ((((ssTop()->type)) == SSRegister)
+ && (!((ssTop()->spilled)))) {
+ actualReg = (ssTop()->registerr);
+ }
+ else {
+ popToReg(ssTop(), preferredReg);
+ }
+ ssPop(1);
+ }
+ else {
+ if (((ssTop()->type)) == SSRegister) {
+ actualReg = (ssTop()->registerr);
+ }
+ else {
+ storeToReg(ssTop(), preferredReg);
+ }
+ }
+ return actualReg;
+}
+
+static CogSimStackEntry *
+ssTop(void)
+{
+ return simStackAt(simStackPtr);
+}
+
+static CogSimStackEntry
+ssTopDescriptor(void)
+{
+ return simStack[simStackPtr];
+}
+
+static CogSimStackEntry *
+ssValue(sqInt n)
+{
+ return simStackAt(simStackPtr - n);
+}
+
+static sqInt
 stackBytesForNumArgs(AbstractInstruction * self_in_stackBytesForNumArgs, sqInt numArgs)
 {
  return numArgs * 4;
@@ -12952,6 +14499,33 @@
  byteAtput(followingAddress - 4, literal & 255);
 }
 
+static void
+storeToReg(CogSimStackEntry * self_in_storeToReg, sqInt reg)
+{
+
+ switch ((self_in_storeToReg->type)) {
+ case SSBaseOffset:
+ case SSSpill:
+ gMoveMwrR((self_in_storeToReg->offset), (self_in_storeToReg->registerr), reg);
+ break;
+ case SSConstant:
+ if (shouldAnnotateObjectReference((self_in_storeToReg->constant))) {
+ annotateobjRef(gMoveCwR((self_in_storeToReg->constant), reg), (self_in_storeToReg->constant));
+ }
+ else {
+ gMoveCqR((self_in_storeToReg->constant), reg);
+ }
+ break;
+ case SSRegister:
+ if (reg != ((self_in_storeToReg->registerr))) {
+ gMoveRR((self_in_storeToReg->registerr), reg);
+ }
+ break;
+ default:
+ error("Case not found and no otherwise clause");
+ }
+}
+
 static sqInt
 sib(AbstractInstruction * self_in_sib, sqInt scale, sqInt indexReg, sqInt baseReg)
 {

Modified: branches/Cog/src/vm/cogit.h
===================================================================
--- branches/Cog/src/vm/cogit.h 2011-01-01 22:45:04 UTC (rev 2339)
+++ branches/Cog/src/vm/cogit.h 2011-01-01 22:51:31 UTC (rev 2340)
@@ -11,7 +11,12 @@
 sqInt canMapBytecodePCsToNativePCs(void);
 extern void (*ceCaptureCStackPointers)();
 sqInt ceCPICMissreceiver(CogMethod *cPIC, sqInt receiver);
+extern void (*ceEnter0ArgsPIC)();
+extern void (*ceEnter1ArgsPIC)();
+extern void (*ceEnter2ArgsPIC)();
 extern void (*ceEnterCogCodePopReceiverAndClassRegs)();
+extern void (*ceEnterCogCodePopReceiverArg0Regs)();
+extern void (*ceEnterCogCodePopReceiverArg1Arg0Regs)();
 extern void (*ceEnterCogCodePopReceiverReg)();
 sqInt ceSICMiss(sqInt receiver);
 void checkAssertsEnabledInCogit(void);
@@ -26,6 +31,8 @@
 void compactCogCompiledCode(void);
 void enterCogCodePopReceiver(void);
 void enterCogCodePopReceiverAndClassRegs(void);
+void enterCogCodePopReceiverArg0Regs(void);
+void enterCogCodePopReceiverArg1Arg0Regs(void);
 CogBlockMethod * findEnclosingMethodForinHomeMethod(sqInt mcpc, CogMethod *cogMethod);
 CogBlockMethod * findMethodForStartBcpcinHomeMethod(sqInt startbcpc, CogMethod *cogMethod);
 sqInt genQuickReturnConst(void);
@@ -76,7 +83,12 @@
 sqInt ceCannotResumeTrampoline;
 void (*ceCaptureCStackPointers)(void);
 sqInt ceCheckForInterruptTrampoline;
+void (*ceEnter0ArgsPIC)(void);
+void (*ceEnter1ArgsPIC)(void);
+void (*ceEnter2ArgsPIC)(void);
 void (*ceEnterCogCodePopReceiverAndClassRegs)(void);
+void (*ceEnterCogCodePopReceiverArg0Regs)(void);
+void (*ceEnterCogCodePopReceiverArg1Arg0Regs)(void);
 void (*ceEnterCogCodePopReceiverReg)(void);
 unsigned long (*ceGetSP)(void);
 sqInt ceReturnToInterpreterTrampoline;
@@ -87,6 +99,8 @@
 sqInt cmNoCheckEntryOffset;
 unsigned long debugPrimCallStackOffset;
 void (*realCEEnterCogCodePopReceiverAndClassRegs)(void);
+void (*realCEEnterCogCodePopReceiverArg0Regs)(void);
+void (*realCEEnterCogCodePopReceiverArg1Arg0Regs)(void);
 void (*realCEEnterCogCodePopReceiverReg)(void);
 int traceLinkedSends ;
 sqInt traceStores;
@@ -101,7 +115,7 @@
 #define getCStackPointer() CStackPointer
 #define noCheckEntryOffset() cmNoCheckEntryOffset
 #define noContextSwitchBlockEntryOffset() blockNoContextSwitchOffset
-#define numRegArgs() 0
+#define numRegArgs() 1
 #define printOnTrace() (traceLinkedSends & 8)
 #define recordEventTrace() (traceLinkedSends & 4)
 #define recordPrimTrace() (traceLinkedSends & 2)