Hi All,
here's a challenge. Currently the JIT is naive about a sequence of inst var assignments. It treats assignment separately, each getting its own copy of the store check. For example when initializing an Interval via: setFrom: startInteger to: stopInteger by: stepInteger start := startInteger. stop := stopInteger. step := stepInteger it generates the following assembly on ARM: A0D0 objhdr: 8000000A000035 nArgs: 3 type: 2 blksiz: 140 method: C03C10 mthhdr: 6180005 selctr: 6B71A0=#setFrom:to:by: blkentry: 0 stackCheckOffset: 0 cmRefersToYoung: no 0000a0ec: mov r7, #0 0000a0f0: push {lr} 0000a0f4: bl 0x000009f8 = 16r9F8 = ceMethodAbortNArgs 0000a0f8: ands r0, r0, #1 0000a0fc: b 0x0000a114 = 16rA114 = setFrom:to:by:@44 entry: 0000a100: ands r0, r7, #3 0000a104: bne 0x0000a0f8 = 16rA0F8 = setFrom:to:by:@28 0000a108: ldr r0, [r7] 0000a10c: mvn ip, #0 0000a110: ands r0, r0, ip, lsr #10 0000a114: cmp r0, r8 0000a118: bne 0x0000a0f0 = 16rA0F0 = setFrom:to:by:@20 noCheckEntry: 0000a11c: ldr r5, [sp, #8] startInteger 0000a120: str r5, [r7, #8] start 0000a124: mov r0, r5 0000a128: ands r0, r0, #3 startInteger immediate? 0000a12c: bne 0x0000a168 = 16rA168 = setFrom:to:by:@98 0000a130: mov r0, #0, 8 0000a134: orr r0, r0, #5308416 ; 0x510000 0000a138: orr r0, r0, #50944 ; 0xc700 0000a13c: orr r0, r0, #136 ; 0x88 = 16r51C788 = nil 0000a140: cmp r7, r0 self in oldSpace? (self >= nil) 0000a144: bcc 0x0000a168 = 16rA168 = setFrom:to:by:@98 0000a148: cmp r5, r0 startInteger young? (startInteger < nil) 0000a14c: bcs 0x0000a168 = 16rA168 = setFrom:to:by:@98 0000a150: ldrb r0, [r7, #3] self in remembered table? 0000a154: ands r0, r0, #32 0000a158: bne 0x0000a168 = 16rA168 = setFrom:to:by:@98 0000a15c: push {lr} add self to remembered table 0000a160: bl 0x00000f88 = 16rF88 = ceStoreCheckTrampoline IsRelativeCall: 0000a164: pop {lr} 0000a168: ldr r5, [sp, #4] stopInteger 0000a16c: str r5, [r7, #12] 0000a170: mov r0, r5 0000a174: ands r0, r0, #3 0000a178: bne 0x0000a1b4 = 16rA1B4 = setFrom:to:by:@E4 0000a17c: mov r0, #0, 8 0000a180: orr r0, r0, #5308416 ; 0x510000 0000a184: orr r0, r0, #50944 ; 0xc700 0000a188: orr r0, r0, #136 ; 0x88 = 16r51C788 = nil 0000a18c: cmp r7, r0 0000a190: bcc 0x0000a1b4 = 16rA1B4 = setFrom:to:by:@E4 0000a194: cmp r5, r0 0000a198: bcs 0x0000a1b4 = 16rA1B4 = setFrom:to:by:@E4 0000a19c: ldrb r0, [r7, #3] 0000a1a0: ands r0, r0, #32 0000a1a4: bne 0x0000a1b4 = 16rA1B4 = setFrom:to:by:@E4 0000a1a8: push {lr} 0000a1ac: bl 0x00000f88 = 16rF88 = ceStoreCheckTrampoline IsRelativeCall: 0000a1b0: pop {lr} 0000a1b4: ldr r5, [sp] stepInteger 0000a1b8: str r5, [r7, #16] 0000a1bc: mov r0, r5 0000a1c0: ands r0, r0, #3 0000a1c4: bne 0x0000a200 = 16rA200 = setFrom:to:by:@130 0000a1c8: mov r0, #0, 8 0000a1cc: orr r0, r0, #5308416 ; 0x510000 0000a1d0: orr r0, r0, #50944 ; 0xc700 0000a1d4: orr r0, r0, #136 ; 0x88 = 16r51C788 = nil 0000a1d8: cmp r7, r0 0000a1dc: bcc 0x0000a200 = 16rA200 = setFrom:to:by:@130 0000a1e0: cmp r5, r0 0000a1e4: bcs 0x0000a200 = 16rA200 = setFrom:to:by:@130 0000a1e8: ldrb r0, [r7, #3] 0000a1ec: ands r0, r0, #32 0000a1f0: bne 0x0000a200 = 16rA200 = setFrom:to:by:@130 0000a1f4: push {lr} 0000a1f8: bl 0x00000f88 = 16rF88 = ceStoreCheckTrampoline IsRelativeCall: 0000a1fc: pop {lr} 0000a200: add sp, sp, #16 ADD 16 to SP 0000a204: mov pc, lr 0000a208: nop ; (mov r0, r0) startpc: 12 16rA164 IsRelativeCall (16rA20F) 16rA1B0 IsRelativeCall (16rA20E) 16rA1FC IsRelativeCall (16rA20D) If the Cogit were smart enough to identify methods that contained only instance initialization code (push arg or push constant, storePop inst var, and return self) we could generate much more compact code, e.g. noCheckEntry: ldr r5, [sp, #8] startInteger str r5, [r7, #8] ldr r5, [sp, #4] stopInteger str r5, [r7, #12] ldr r5, [sp] stepInteger str r5, [r7, #16] mov r0, #0, 8 orr r0, r0, #5308416 ; 0x510000 orr r0, r0, #50944 ; 0xc700 orr r0, r0, #136 ; 0x88 = 16r51C788 = nil cmp r7, r0 self young? if so, jump to return bcc L4 ldr r5, [sp, #8] startInteger immediate? tests r5, r5, #3 bne L1 cmp r5, r0 startInteger old? bcs L1 L2: ldrb r0, [r7, #3] self in remembered table? ands r0, r0, #32 bne L1 push {lr} bl ceStoreCheckTrampoline IsRelativeCall: pop {lr} add sp, sp, #16 ADD 16 to SP mov pc, lr L1: ldr r5, [sp, #4] stopInteger tests r5, r5, #3 bne L3 cmp r5, r0 bcs L3 ldrb r0, [r7, #3] ands r0, r0, #32 beq L2 L3: ldr r5, [sp] stepInteger tests r5, r5, #3 bne L4 cmp r5, r0 bcs L4 ldrb r0, [r7, #3] ands r0, r0, #32 beq L2 L4: add sp, sp, #16 ADD 16 to SP mov pc, lr That's 5 times shorter, including method header, and does far less work in common cases (self is compared with nil, which is also the old/new boundary only once, not once for every inst var) but in the case of an Interval using SmallIntegers may be slower because each variable is read from the stack twice. Interesting choices. But I think the code density would win. Of course, this may make no difference to overall speed but it would be nice to now :-). If you're up for this, let me know. -- best,
Eliot |
Hi eliot > Hi All, > > here's a challenge. Currently the JIT is naive about a sequence of inst var assignments. It treats assignment separately, each getting its own copy of the store check. For example when initializing an Interval via: > > setFrom: startInteger to: stopInteger by: stepInteger > > start := startInteger. > stop := stopInteger. > step := stepInteger Is this pattern frequent? Because it was proposed by K. Beck but do people use it? Especially now with automatic invocation of initialize, we often have start := startInteger. > stop := stopInteger. > step := stepInteger self do. |
I think what you want Eliot is not necessarily related to frameless methods. I think what you want is that if there are several storeChecks in the same object in-between two interrupt points, which could be several inst var store or several inlined at: put: on the same object, you want to merge the storeCheck logic so it's done in one place instead of multiple. In the case of frameless methods, there are no interrupt points, so it works for the whole method. However, I don't see why the technique would not apply if you write code such as: foo: arg1 "any code here that may include sends or anything" instVar1 := 1. instVar2 := arg1. instVar3 := #bar. "any code here that may include sends or anything" In this case you can merge the store check for the 3 inst var as it's done in between two interrupt points. Currently, for each instVar store, the process is: - is the value stored a immediate ? - is the object holding the instVar young ? - is the value stored old ? - is the object holding the instVar already remembered ? If all 4 are false, the object holding the instVar needs to be added in the remembered table. And I guess the 2 tests on the object holding the inst var, 2 out of 4, could be done only once. In the future I guess that the write barrier will be more important because of the tricolor marking GC. As far as I understood, we'll have to check in addition if the object and the value are not black & white or white & black, in which case we need to color one of the operands to gray. So I guess an improvement there may be valuable. 2015-04-24 8:05 GMT+02:00 stephane ducasse <[hidden email]>:
|
Hi Clément,
|
Free forum by Nabble | Edit this page |