1. A guide to the CHICKEN compilation process
    1. Source
    2. Canonicalized
    3. CPS converted
    4. Optimization (1)
    5. Optimization (2)
    6. Optimization (3)
    7. Optimization (4)
    8. Optimization (5)
    9. Closure conversion
    10. Code generation
    11. Another example
  2. References

A guide to the CHICKEN compilation process

This document describes the compilation process used by the CHICKEN Scheme to C compiler by explaining the different compilation stages on a simple example program.

CHICKEN uses a compilation strategy called Cheney-on-the-MTA after a paper by Henry Baker[1]. The basic idea is quite simple: compile Scheme to C by first transforming a program into Continuation Passing Style (CPS) and then directly generate C code in CPS as functions that never return and call a stack-allocated continuation record instead (that also holds a pointer to the code of a continuation procedure). Allocation is done simply by creating data structures on the stack - since functions never return, the allocated data will stay "live". As this would build up stack-frames endlessly, the stack-pointer is checked at regular times (currently on every function entry) whether a predetermined limit is reached and once the limit is exceeded, the current arguments and continuation is saved and all live data is copied into the heap (effectively a second heap generation in a generational garbage collection scheme). This copying traverses all data that can be reached from the current dynamic state of the program (which is just the continuation, the arguments passed to the current procedure plus the current closure). Unreachable data is never touched and thus the time required for copying is proportional to the amount of live data on the stack.

Allocation can be extremely fast as we basically can use the machine's stack-pointer as a dedicated allocation pointer register. Another advantage is that through the CPS conversion (combined with a flat closure representation) a minimum of garbage is retained: only data in free variables that are guaranteed to be used is stored in continuation closure records, at a sub-procedure level. As continuations are explicit and inherent under this strategy, code that uses continuations heavily pays no performance penalty. This is particularly important when threads are implemented on top of continuations.

A disadvantage is that a lot of allocation takes place and that the CPS representation puts certain contraints on interfacing to foreign code (especially when callbacks are involved).

Ok, let's start with an example: the well-known N-queens problem. We present the code as it is transformed by the various compilation stages:

Source

;;; NQUEENS -- Compute number of solutions to 8-queens problem.

(define (nqueens n)

  (define (dec-to n)
    (let loop ((i n) (l '()))
      (if (= i 0) l (loop (- i 1) (cons i l)))))

  (define (try x y z)
    (if (null? x)
      (if (null? y)
        1
        0)
      (+ (if (ok? (car x) 1 z)
           (try (append (cdr x) y) '() (cons (car x) z))
           0)
         (try (cdr x) (cons (car x) y) z))))

  (define (ok? row dist placed)
    (if (null? placed)
      #t
      (and (not (= (car placed) (+ row dist)))
           (not (= (car placed) (- row dist)))
           (ok? row (+ dist 1) (cdr placed)))))

  (try (dec-to n) '() '()))

(nqueens 8)

Canonicalized

First comes canonicalization, which means macro expansion and some basic source normalization. Get this output using csc -debug 2:

(##core#callunit "library")
(##core#callunit "eval")
(##core#callunit "extras")
(##core#undefined)
(##core#undefined)
(set! nqueens
  (lambda (n0)
    (let ((dec-to1 (##core#undefined))
          (try2 (##core#undefined))
          (ok?3 (##core#undefined)))
      (let ((t15 (set! dec-to1
                   (lambda (n4)
                     (##core#app
                       (let ((loop5 (##core#undefined)))
                         (let ((t8 (set! loop5
                                     (lambda (i6 l7)
                                       (if (= i6 '0) l7 (loop5 (- i6 '1) (cons i6 l7)))))))
                           (let () loop5)))
                       n4
                       '())))))
        (let ((t16 (set! try2
                     (lambda (x9 y10 z11)
                       (if (null? x9)
                         (if (null? y10) '1 '0)
                         (+ (if (ok?3 (car x9) '1 z11)
                              (try2 (append (cdr x9) y10)
                                    '()
                                    (cons (car x9) z11))
                              '0)
                            (try2 (cdr x9) (cons (car x9) y10) z11)))))))
          (let ((t17 (set! ok?3
                       (lambda (row12 dist13 placed14)
                         (if (null? placed14)
                           '#t
                           (if (not (= (car placed14) (+ row12 dist13)))
                             (if (not (= (car placed14) (- row12 dist13)))
                               (ok?3 row12 (+ dist13 '1) (cdr placed14))
                               '#f)
                             '#f))))))
            (try2 (dec-to1 n0) '() '())))))))
(nqueens '8)
((##sys#implicit-exit-handler))
(##core#undefined)

You'll note some administrative forms at the start and the end of the program - they ensure termination will call some setup- and cleanup code. Toplevel-definitions are also replaced by assignment and lexical identifiers are renamed (alpha-converted).

CPS converted

The next step is converting to CPS - you see that this generates quite a lot of code - the code has been slightly re-formatted to fit into this page. Note that the compiler operates on an abstract syntax tree now, the s-expression notation can be reconstructed by entering csc -debug 3:

(lambda (k26)
  (let ((k27 (##core#lambda (r28)
  (let ((t19 r28))
  (let ((k30 (##core#lambda (r31)
  (let ((t20 r31))
  (let ((k33 (##core#lambda (r34)
  (let ((t21 r34))
  (let ((t36 (set! nqueens
  (lambda (k38 n0)
  (let ((dec-to1 (##core#undefined)))
  (let ((try2 (##core#undefined)))
  (let ((ok?3 (##core#undefined)))
  (let ((t39 (set! dec-to1
  (lambda (k41 n4)
  (let ((k42 (##core#lambda (r43) (k41 r43))))
  (let ((loop5 (##core#undefined)))
  (let ((t45 (set! loop5
  (lambda (k47 i6 l7)
  (let ((k48 (##core#lambda (r49) (k47 r49))))
  (let ((k51 (##core#lambda (r52)
  (if r52
  (k48 l7)
  (let ((k54 (##core#lambda (r55) (k48 r55))))
  (let ((k58 (##core#lambda (r59)
  (let ((a57 r59))
  (let ((k62 (##core#lambda (r63)
  (let ((a61 r63)) (loop5 k54 a57 a61)))))
  (cons k62 i6 l7))))))
  (- k58 i6 '1)))))))
  (= k51 i6 '0)))))))
  (let ((t8 t45)) (loop5 k42 n4 '())))))))))
  (let ((t15 t39))
  (let ((t65 (set! try2
  (lambda (k67 x9 y10 z11)
  (let ((k68 (##core#lambda (r69) (k67 r69))))
  (let ((k71 (##core#lambda (r72)
  (if r72
  (let ((k74 (##core#lambda (r75) (k68 r75))))
  (let ((k77 (##core#lambda (r78) (if r78 (k74 '1) (k74 '0)))))
  (null? k77 y10)))
  (let ((k80 (##core#lambda (r81) (k68 r81))))
  (let ((k84 (##core#lambda (r85)
  (let ((a83 r85))
  (let ((k88 (##core#lambda (r89)
  (let ((a87 r89)) (+ k80 a83 a87)))))
  (let ((k92 (##core#lambda (r93)
  (let ((a91 r93))
  (let ((k96 (##core#lambda (r97)
  (let ((a95 r97)) (try2 k88 a91 a95 z11)))))
  (let ((k100 (##core#lambda (r101)
  (let ((a99 r101)) (cons k96 a99 y10)))))
  (car k100 x9)))))))
  (cdr k92 x9)))))))
  (let ((k103 (##core#lambda (r104)
  (if r104
  (let ((k106 (##core#lambda (r107) (k84 r107))))
  (let ((k110 (##core#lambda (r111)
  (let ((a109 r111))
  (let ((k114 (##core#lambda (r115)
  (let ((a113 r115)) (try2 k106 a109 '() a113)))))
  (let ((k118 (##core#lambda (r119)
  (let ((a117 r119)) (cons k114 a117 z11)))))
  (car k118 x9)))))))
  (let ((k122 (##core#lambda (r123)
  (let ((a121 r123)) (append k110 a121 y10)))))
  (cdr k122 x9))))
  (k84 '0)))))
  (let ((k126 (##core#lambda (r127)
  (let ((a125 r127)) (ok?3 k103 a125 '1 z11)))))
  (car k126 x9)))))))))
  (null? k71 x9)))))))
  (let ((t16 t65))
  (let ((t129 (set! ok?3
  (lambda (k131 row12 dist13 placed14)
  (let ((k132 (##core#lambda (r133) (k131 r133))))
  (let ((k135 (##core#lambda (r136)
  (if r136
  (k132 '#t)
  (let ((k138 (##core#lambda (r139) (k132 r139))))
  (let ((k141 (##core#lambda (r142)
  (if r142
  (let ((k144 (##core#lambda (r145) (k138 r145))))
  (let ((k147 (##core#lambda (r148)
  (if r148
  (let ((k150 (##core#lambda (r151) (k144 r151))))
  (let ((k154 (##core#lambda (r155)
  (let ((a153 r155))
  (let ((k158 (##core#lambda (r159)
  (let ((a157 r159)) (ok?3 k150 row12 a153 a157)))))
  (cdr k158 placed14))))))
  (+ k154 dist13 '1)))
  (k144 '#f)))))
  (let ((k162 (##core#lambda (r163)
  (let ((a161 r163)) (not k147 a161)))))
  (let ((k166 (##core#lambda (r167)
  (let ((a165 r167))
  (let ((k170 (##core#lambda (r171)
  (let ((a169 r171)) (= k162 a165 a169)))))
  (- k170 row12 dist13))))))
  (car k166 placed14)))))
  (k138 '#f)))))
  (let ((k174 (##core#lambda (r175)
  (let ((a173 r175)) (not k141 a173)))))
  (let ((k178 (##core#lambda (r179)
  (let ((a177 r179))
  (let ((k182 (##core#lambda (r183)
  (let ((a181 r183)) (= k174 a177 a181)))))
  (+ k182 row12 dist13))))))
  (car k178 placed14)))))))))
  (null? k135 placed14)))))))
  (let ((t17 t129))
  (let ((k185 (##core#lambda (r186) (k38 r186))))
  (let ((k189 (##core#lambda (r190)
  (let ((a188 r190)) (try2 k185 a188 '() '())))))
  (dec-to1 k189 n0))))))))))))))))
  (let ((t22 t36))
  (let ((k192 (##core#lambda (r193)
  (let ((t23 r193))
  (let ((k195 (##core#lambda (r196)
  (let ((t24 r196)) (k26 (##core#undefined))))))
  (let ((k198 (##core#lambda (r199) (r199 k195))))
  (##sys#implicit-exit-handler k198)))))))
  (nqueens k192 '8))))))))
  (##core#callunit "extras" k33))))))
  (##core#callunit "eval" k30))))))
  (##core#callunit "library" k27)))

##core#lambda refers to lambda forms introduced by the CPS conversion. ##core#undefined is an internal variant of void.

Optimization (1)

Optimization is now performed, iteratively until the program is stable: two occurrences of not have been removed by exchanging the branches of the conditional operator (if).

(lambda (k26)
  (let ((k27 (##core#lambda (r28)
  (let ((t19 r28))
  (let ((k30 (##core#lambda (r31)
  (let ((t20 r31))
  (let ((k33 (##core#lambda (r34)
  (let ((t21 r34))
  (let ((t36 (set! nqueens
  (lambda (k38 n0)
  (let ((dec-to1 (##core#undefined)))
  (let ((try2 (##core#undefined)))
  (let ((ok?3 (##core#undefined)))
  (let ((t39 (set! dec-to1
  (lambda (k41 n4)
  (let ((k42 (##core#lambda (r43) (k41 r43))))
  (let ((loop5 (##core#undefined)))
  (let ((t45 (set! loop5
  (lambda (k47 i6 l7)
  (let ((k48 (##core#lambda (r49) (k47 r49))))
  (let ((k51 (##core#lambda (r52)
  (if r52
  (k48 l7)
  (let ((k54 (##core#lambda (r55) (k48 r55))))
  (let ((k58 (##core#lambda (r59)
  (let ((a57 r59))
  (let ((k62 (##core#lambda (r63)
  (let ((a61 r63)) (loop5 k54 a57 a61)))))
  (cons k62 i6 l7))))))
  (- k58 i6 '1)))))))
  (= k51 i6 '0)))))))
  (let ((t8 t45)) (loop5 k42 n4 '())))))))))
  (let ((t15 t39))
  (let ((t65 (set! try2
  (lambda (k67 x9 y10 z11)
  (let ((k68 (##core#lambda (r69) (k67 r69))))
  (let ((k71 (##core#lambda (r72)
  (if r72
  (let ((k74 (##core#lambda (r75) (k68 r75))))
  (let ((k77 (##core#lambda (r78) (if r78 (k74 '1) (k74 '0)))))
  (null? k77 y10)))
  (let ((k80 (##core#lambda (r81) (k68 r81))))
  (let ((k84 (##core#lambda (r85)
  (let ((a83 r85))
  (let ((k88 (##core#lambda (r89)
  (let ((a87 r89)) (+ k80 a83 a87)))))
  (let ((k92 (##core#lambda (r93)
  (let ((a91 r93))
  (let ((k96 (##core#lambda (r97)
  (let ((a95 r97)) (try2 k88 a91 a95 z11)))))
  (let ((k100 (##core#lambda (r101)
  (let ((a99 r101)) (cons k96 a99 y10)))))
  (car k100 x9)))))))
  (cdr k92 x9)))))))
  (let ((k103 (##core#lambda (r104)
  (if r104
  (let ((k106 (##core#lambda (r107) (k84 r107))))
  (let ((k110 (##core#lambda (r111)
  (let ((a109 r111))
  (let ((k114 (##core#lambda (r115)
  (let ((a113 r115)) (try2 k106 a109 '() a113)))))
  (let ((k118 (##core#lambda (r119)
  (let ((a117 r119)) (cons k114 a117 z11)))))
  (car k118 x9)))))))
  (let ((k122 (##core#lambda (r123)
  (let ((a121 r123)) (append k110 a121 y10)))))
  (cdr k122 x9))))
  (k84 '0)))))
  (let ((k126 (##core#lambda (r127)
  (let ((a125 r127)) (ok?3 k103 a125 '1 z11)))))
  (car k126 x9)))))))))
  (null? k71 x9)))))))
  (let ((t16 t65))
  (let ((t129 (set! ok?3
  (lambda (k131 row12 dist13 placed14)
  (let ((k132 (##core#lambda (r133) (k131 r133))))
  (let ((k135 (##core#lambda (r136)
  (if r136
  (k132 '#t)
  (let ((k138 (##core#lambda (r139) (k132 r139))))
  (let ((k141 (##core#lambda (r142)
  (if r142
  (k138 '#f)
  (let ((k144 (##core#lambda (r145) (k138 r145))))
  (let ((k147 (##core#lambda (r148)
  (if r148
  (k144 '#f)
  (let ((k150 (##core#lambda (r151) (k144 r151))))
  (let ((k154 (##core#lambda (r155)
  (let ((a153 r155))
  (let ((k158 (##core#lambda (r159)
  (let ((a157 r159)) (ok?3 k150 row12 a153 a157)))))
  (cdr k158 placed14))))))
  (+ k154 dist13 '1)))))))
  (let ((k162 (##core#lambda (r163)
  (let ((a161 r163)) (k147 a161)))))
  (let ((k166 (##core#lambda (r167)
  (let ((a165 r167))
  (let ((k170 (##core#lambda (r171)
  (let ((a169 r171)) (= k162 a165 a169)))))
  (- k170 row12 dist13))))))
  (car k166 placed14)))))))))
  (let ((k174 (##core#lambda (r175)
  (let ((a173 r175)) (k141 a173)))))
  (let ((k178 (##core#lambda (r179)
  (let ((a177 r179))
  (let ((k182 (##core#lambda (r183)
  (let ((a181 r183)) (= k174 a177 a181)))))
  (+ k182 row12 dist13))))))
  (car k178 placed14)))))))))
  (null? k135 placed14)))))))
  (let ((t17 t129))
  (let ((k185 (##core#lambda (r186) (k38 r186))))
  (let ((k189 (##core#lambda (r190)
  (let ((a188 r190)) (try2 k185 a188 '() '())))))
  (dec-to1 k189 n0))))))))))))))))
  (let ((t22 t36))
  (let ((k192 (##core#lambda (r193)
  (let ((t23 r193))
  (let ((k195 (##core#lambda (r196)
  (let ((t24 r196)) (k26 (##core#undefined))))))
  (let ((k198 (##core#lambda (r199) (r199 k195))))
  (##sys#implicit-exit-handler k198)))))))
  (nqueens k192 '8))))))))
  (##core#callunit "extras" k33))))))
  (##core#callunit "eval" k30))))))
  (##core#callunit "library" k27)))

Optimization (2)

Next round. This time k141, k147 and dec-to1 have been contracted, which means inlining procedures that are called only once (an optimization that guarantees the program will not grow). Some variables and bindings have been removed as they are unnecessary:

(lambda (k26)
  (let ((k27 (##core#lambda (r28)
  (let ((k30 (##core#lambda (r31)
  (let ((k33 (##core#lambda (r34)
  (let ((t36 (set! nqueens
  (lambda (k38 n0)
  (let ((try2 (##core#undefined)))
  (let ((ok?3 (##core#undefined)))
  (let ((t39 (##core#undefined)))
  (let ((t65 (set! try2
  (lambda (k67 x9 y10 z11)
  (let ((k71 (##core#lambda (r72)
  (if r72
  (let ((k77 (##core#lambda (r78) (if r78 (k67 '1) (k67 '0)))))
  (null? k77 y10))
  (let ((k84 (##core#lambda (r85)
  (let ((k88 (##core#lambda (r89) (+ k67 r85 r89))))
  (let ((k92 (##core#lambda (r93)
  (let ((k96 (##core#lambda (r97) (try2 k88 r93 r97 z11))))
  (let ((k100 (##core#lambda (r101) (cons k96 r101 y10))))
  (car k100 x9))))))
  (cdr k92 x9))))))
  (let ((k103 (##core#lambda (r104)
  (if r104
  (let ((k110 (##core#lambda (r111)
  (let ((k114 (##core#lambda (r115) (try2 k84 r111 '() r115))))
  (let ((k118 (##core#lambda (r119) (cons k114 r119 z11))))
  (car k118 x9))))))
  (let ((k122 (##core#lambda (r123) (append k110 r123 y10))))
  (cdr k122 x9)))
  (k84 '0)))))
  (let ((k126 (##core#lambda (r127) (ok?3 k103 r127 '1 z11))))
  (car k126 x9))))))))
  (null? k71 x9))))))
  (let ((t129 (set! ok?3
  (lambda (k131 row12 dist13 placed14)
  (let ((k135 (##core#lambda (r136)
  (if r136
  (k131 '#t)
  (let ((k174 (##core#lambda (r175)
  (let ((r142 r175))
  (if r142
  (k131 '#f)
  (let ((k162 (##core#lambda (r163)
  (let ((r148 r163))
  (if r148
  (k131 '#f)
  (let ((k154 (##core#lambda (r155)
  (let ((k158 (##core#lambda (r159)
  (ok?3 k131 row12 r155 r159))))
  (cdr k158 placed14)))))
  (+ k154 dist13 '1)))))))
  (let ((k166 (##core#lambda (r167)
  (let ((k170 (##core#lambda (r171) (= k162 r167 r171))))
  (- k170 row12 dist13)))))
  (car k166 placed14))))))))
  (let ((k178 (##core#lambda (r179)
  (let ((k182 (##core#lambda (r183) (= k174 r179 r183))))
  (+ k182 row12 dist13)))))
  (car k178 placed14)))))))
  (null? k135 placed14))))))
  (let ((k189 (##core#lambda (r190) (try2 k38 r190 '() '()))))
  (let ((k41 k189))
  (let ((n4 n0))
  (let ((loop5 (##core#undefined)))
  (let ((t45 (set! loop5
  (lambda (k47 i6 l7)
  (let ((k51 (##core#lambda (r52)
  (if r52
  (k47 l7)
  (let ((k58 (##core#lambda (r59)
  (let ((k62 (##core#lambda (r63) (loop5 k47 r59 r63))))
  (cons k62 i6 l7)))))
  (- k58 i6 '1))))))
  (= k51 i6 '0))))))
  (loop5 k41 n4 '())))))))))))))))
  (let ((k192 (##core#lambda (r193)
  (let ((k195 (##core#lambda (r196) (k26 (##core#undefined)))))
  (let ((k198 (##core#lambda (r199) (r199 k195))))
  (##sys#implicit-exit-handler k198))))))
  (nqueens k192 '8))))))
  (##core#callunit "extras" k33)))))
  (##core#callunit "eval" k30)))))
  (##core#callunit "library" k27)))

Optimization (3)

More elimination of local variables:

(lambda (k26)
  (let ((k27 (##core#lambda (r28)
  (let ((k30 (##core#lambda (r31)
  (let ((k33 (##core#lambda (r34)
  (let ((t36 (set! nqueens
  (lambda (k38 n0)
  (let ((try2 (##core#undefined)))
  (let ((ok?3 (##core#undefined)))
  (let ((t65 (set! try2
  (lambda (k67 x9 y10 z11)
  (let ((k71 (##core#lambda (r72)
  (if r72
  (let ((k77 (##core#lambda (r78) (if r78 (k67 '1) (k67 '0)))))
  (null? k77 y10))
  (let ((k84 (##core#lambda (r85)
  (let ((k88 (##core#lambda (r89) (+ k67 r85 r89))))
  (let ((k92 (##core#lambda (r93)
  (let ((k96 (##core#lambda (r97) (try2 k88 r93 r97 z11))))
  (let ((k100 (##core#lambda (r101) (cons k96 r101 y10))))
  (car k100 x9))))))
  (cdr k92 x9))))))
  (let ((k103 (##core#lambda (r104)
  (if r104
  (let ((k110 (##core#lambda (r111)
  (let ((k114 (##core#lambda (r115) (try2 k84 r111 '() r115))))
  (let ((k118 (##core#lambda (r119) (cons k114 r119 z11))))
  (car k118 x9))))))
  (let ((k122 (##core#lambda (r123) (append k110 r123 y10))))
  (cdr k122 x9)))
  (k84 '0)))))
  (let ((k126 (##core#lambda (r127) (ok?3 k103 r127 '1 z11))))
  (car k126 x9))))))))
  (null? k71 x9))))))
  (let ((t129 (set! ok?3
  (lambda (k131 row12 dist13 placed14)
  (let ((k135 (##core#lambda (r136)
  (if r136
  (k131 '#t)
  (let ((k174 (##core#lambda (r175)
  (if r175
  (k131 '#f)
  (let ((k162 (##core#lambda (r163)
  (if r163
  (k131 '#f)
  (let ((k154 (##core#lambda (r155)
  (let ((k158 (##core#lambda (r159)
  (ok?3 k131 row12 r155 r159))))
  (cdr k158 placed14)))))
  (+ k154 dist13 '1))))))
  (let ((k166 (##core#lambda (r167)
  (let ((k170 (##core#lambda (r171) (= k162 r167 r171))))
  (- k170 row12 dist13)))))
  (car k166 placed14)))))))
  (let ((k178 (##core#lambda (r179)
  (let ((k182 (##core#lambda (r183) (= k174 r179 r183))))
  (+ k182 row12 dist13)))))
  (car k178 placed14)))))))
  (null? k135 placed14))))))
  (let ((k189 (##core#lambda (r190) (try2 k38 r190 '() '()))))
  (let ((loop5 (##core#undefined)))
  (let ((t45 (set! loop5
  (lambda (k47 i6 l7)
  (let ((k51 (##core#lambda (r52)
  (if r52
  (k47 l7)
  (let ((k58 (##core#lambda (r59)
  (let ((k62 (##core#lambda (r63) (loop5 k47 r59 r63))))
  (cons k62 i6 l7)))))
  (- k58 i6 '1))))))
  (= k51 i6 '0))))))
  (loop5 k189 n0 '()))))))))))))
  (let ((k192 (##core#lambda (r193)
  (let ((k195 (##core#lambda (r196) (k26 (##core#undefined)))))
  (let ((k198 (##core#lambda (r199) (r199 k195))))
  (##sys#implicit-exit-handler k198))))))
  (nqueens k192 '8))))))
  (##core#callunit "extras" k33)))))
  (##core#callunit "eval" k30)))))
  (##core#callunit "library" k27)))

Optimization (4)

After this pass no more basic optimizations have been performed, so we enable simplification of builtin primitives by replacing them with more primitive forms:

(lambda (k26)
  (let ((k27 (##core#lambda (r28)
  (let ((k30 (##core#lambda (r31)
  (let ((k33 (##core#lambda (r34)
  (let ((t36 (set! nqueens
  (lambda (k38 n0)
  (let ((try2 (##core#undefined)))
  (let ((ok?3 (##core#undefined)))
  (let ((t65 (set! try2
  (lambda (k67 x9 y10 z11)
  (let ((k71 (##core#lambda (r72)
  (if r72
  (let ((k77 (##core#lambda (r78)
  (k67 (##core#cond r78 '1 '0)))))
  (k77 (##core#inline "C_i_nullp" y10)))
  (let ((k84 (##core#lambda (r85)
  (let ((k88 (##core#lambda (r89)
  (k67 (##core#inline_allocate "C_a_i_plus" 4 r85 r89)))))
  (let ((k92 (##core#lambda (r93)
  (let ((k96 (##core#lambda (r97) (try2 k88 r93 r97 z11))))
  (let ((k100 (##core#lambda (r101)
  (k96 (##core#inline_allocate "C_a_i_cons" 3 r101 y10)))))
  (k100 (##core#inline "C_i_car" x9)))))))
  (k92 (##core#inline "C_i_cdr" x9)))))))
  (let ((k103 (##core#lambda (r104)
  (if r104
  (let ((k110 (##core#lambda (r111)
  (let ((k114 (##core#lambda (r115) (try2 k84 r111 '() r115))))
  (let ((k118 (##core#lambda (r119)
  (k114 (##core#inline_allocate "C_a_i_cons" 3 r119 z11)))))
  (k118 (##core#inline "C_i_car" x9)))))))
  (let ((k122 (##core#lambda (r123) (append k110 r123 y10))))
  (k122 (##core#inline "C_i_cdr" x9))))
  (k84 '0)))))
  (let ((k126 (##core#lambda (r127) (ok?3 k103 r127 '1 z11))))
  (k126 (##core#inline "C_i_car" x9)))))))))
  (k71 (##core#inline "C_i_nullp" x9)))))))
  (let ((t129 (set! ok?3
  (lambda (k131 row12 dist13 placed14)
  (let ((k135 (##core#lambda (r136)
  (if r136
  (k131 '#t)
  (let ((k174 (##core#lambda (r175)
  (if r175
  (k131 '#f)
  (let ((k162 (##core#lambda (r163)
  (if r163
  (k131 '#f)
  (let ((k154 (##core#lambda (r155)
  (let ((k158 (##core#lambda (r159)
  (ok?3 k131 row12 r155 r159))))
  (k158 (##core#inline "C_i_cdr" placed14))))))
  (k154 (##core#inline_allocate "C_a_i_plus" 4 dist13 '1)))))))
  (let ((k166 (##core#lambda (r167)
  (let ((k170 (##core#lambda (r171)
  (k162 (##core#inline "C_i_nequalp" r167 r171)))))
  (k170 (##core#inline_allocate "C_a_i_minus" 4 row12 dist13))))))
  (k166 (##core#inline "C_i_car" placed14))))))))
  (let ((k178 (##core#lambda (r179)
  (let ((k182 (##core#lambda (r183)
  (k174 (##core#inline "C_i_nequalp" r179 r183)))))
  (k182 (##core#inline_allocate "C_a_i_plus" 4 row12 dist13))))))
  (k178 (##core#inline "C_i_car" placed14))))))))
  (k135 (##core#inline "C_i_nullp" placed14)))))))
  (let ((k189 (##core#lambda (r190) (try2 k38 r190 '() '()))))
  (let ((loop5 (##core#undefined)))
  (let ((t45 (set! loop5
  (lambda (k47 i6 l7)
  (let ((k51 (##core#lambda (r52)
  (if r52
  (k47 l7)
  (let ((k58 (##core#lambda (r59)
  (let ((k62 (##core#lambda (r63) (loop5 k47 r59 r63))))
  (k62 (##core#inline_allocate "C_a_i_cons" 3 i6 l7))))))
  (k58 (##core#inline_allocate "C_a_i_minus" 4 i6 '1)))))))
  (k51 (##core#inline "C_i_nequalp" i6 '0)))))))
  (loop5 k189 n0 '()))))))))))))
  (let ((k192 (##core#lambda (r193)
  (let ((k195 (##core#lambda (r196) (k26 (##core#undefined)))))
  (let ((k198 (##core#lambda (r199) (r199 k195))))
  (##sys#implicit-exit-handler k198))))))
  (nqueens k192 '8))))))
  (##core#callunit "extras" k33)))))
  (##core#callunit "eval" k30)))))
  (##core#callunit "library" k27)))

Optimization (5)

More contractions, which have now been enabled by the previous optimizations, additionally some bindings could be removed:

(lambda (k26)
  (let ((k27 (##core#lambda (r28)
  (let ((k30 (##core#lambda (r31)
  (let ((k33 (##core#lambda (r34)
  (let ((t36 (set! nqueens
  (lambda (k38 n0)
  (let ((try2 (##core#undefined)))
  (let ((ok?3 (##core#undefined)))
  (let ((t65 (set! try2
  (lambda (k67 x9 y10 z11)
  (if (##core#inline "C_i_nullp" x9)
  (let ((r78 (##core#inline "C_i_nullp" y10)))
  (k67 (##core#cond r78 '1 '0)))
  (let ((k84 (##core#lambda (r85)
  (let ((k88 (##core#lambda (r89)
  (k67 (##core#inline_allocate "C_a_i_plus" 4 r85 r89)))))
  (let ((r93 (##core#inline "C_i_cdr" x9)))
  (let ((r101 (##core#inline "C_i_car" x9)))
  (let ((r97 (##core#inline_allocate "C_a_i_cons" 3 r101 y10)))
  (try2 k88 r93 r97 z11))))))))
  (let ((k103 (##core#lambda (r104)
  (if r104
  (let ((k110 (##core#lambda (r111)
  (let ((r119 (##core#inline "C_i_car" x9)))
  (let ((r115 (##core#inline_allocate "C_a_i_cons" 3 r119 z11)))
  (try2 k84 r111 '() r115))))))
  (let ((r123 (##core#inline "C_i_cdr" x9)))
  (append k110 r123 y10)))
  (k84 '0)))))
  (let ((r127 (##core#inline "C_i_car" x9)))
  (ok?3 k103 r127 '1 z11)))))))))
  (let ((t129 (set! ok?3
  (lambda (k131 row12 dist13 placed14)
  (if (##core#inline "C_i_nullp" placed14)
  (k131 '#t)
  (let ((r179 (##core#inline "C_i_car" placed14)))
  (let ((r183 (##core#inline_allocate "C_a_i_plus" 4 row12 dist13)))
  (if (##core#inline "C_i_nequalp" r179 r183)
  (k131 '#f)
  (let ((r167 (##core#inline "C_i_car" placed14)))
  (let ((r171 (##core#inline_allocate "C_a_i_minus" 4 row12 dist13)))
  (if (##core#inline "C_i_nequalp" r167 r171)
  (k131 '#f)
  (let ((r155 (##core#inline_allocate "C_a_i_plus" 4 dist13 '1)))
  (let ((r159 (##core#inline "C_i_cdr" placed14)))
  (ok?3 k131 row12 r155 r159))))))))))))))
  (let ((k189 (##core#lambda (r190) (try2 k38 r190 '() '()))))
  (let ((loop5 (##core#undefined)))
  (let ((t45 (set! loop5
  (lambda (k47 i6 l7)
  (if (##core#inline "C_i_nequalp" i6 '0)
  (k47 l7)
  (let ((r59 (##core#inline_allocate "C_a_i_minus" 4 i6 '1)))
  (let ((r63 (##core#inline_allocate "C_a_i_cons" 3 i6 l7)))
  (loop5 k47 r59 r63))))))))
  (loop5 k189 n0 '()))))))))))))
  (let ((k192 (##core#lambda (r193)
  (let ((k195 (##core#lambda (r196) (k26 (##core#undefined)))))
  (let ((k198 (##core#lambda (r199) (r199 k195))))
  (##sys#implicit-exit-handler k198))))))
  (nqueens k192 '8))))))
  (##core#callunit "extras" k33)))))
  (##core#callunit "eval" k30)))))
  (##core#callunit "library" k27)))

Closure conversion

Now procedures are transformed into explicit creation and access code for closures:

(##core#closure (2)
  (lambda (c239 k26)
  (let ((k27 (##core#closure (2)
  (##core#lambda (c241 r28)
  (let ((k30 (##core#closure (2)
  (##core#lambda (c243 r31)
  (let ((k33 (##core#closure (2)
  (##core#lambda (c245 r34)
  (let ((t36 (set! nqueens
  (##core#closure (2)
  (lambda (c247 k38 n0)
  (let ((try2248 (##core#undefined)))
  (let ((try2 (##core#box try2248)))
  (let ((ok?3249 (##core#undefined)))
  (let ((ok?3 (##core#box ok?3249)))
  (let ((t65 (##core#updatebox
  try2
  (##core#closure (4)
  (lambda (c251 k67 x9 y10 z11)
  (if (##core#inline "C_i_nullp" x9)
  (let ((r78 (##core#inline "C_i_nullp" y10)))
  (k67 (##core#cond r78 '1 '0)))
  (let ((k84 (##core#closure (6)
  (##core#lambda (c254 r85)
  (let ((k88 (##core#closure (3)
  (##core#lambda (c256 r89)
  ((##core#ref c256 (2))
  (##core#inline_allocate "C_a_i_plus" 4 (##core#ref c256 (1)) r89)))
  r85
  (##core#ref c254 (5)))))
  (let ((r93 (##core#inline "C_i_cdr" (##core#ref c254 (4)))))
  (let ((r101 (##core#inline "C_i_car" (##core#ref c254 (4)))))
  (let ((r97 (##core#inline_allocate "C_a_i_cons" 3 r101 (##core#ref c254 (3)))))
  ((##core#unbox (##core#ref c254 (2)) ())
  k88
  r93
  r97
  (##core#ref c254 (1))))))))
  z11
  (##core#ref c251 (2))
  y10
  x9
  k67)))
  (let ((k103 (##core#closure (6)
  (##core#lambda (c261 r104)
  (if r104
  (let ((k110 (##core#closure (5)
  (##core#lambda (c263 r111)
  (let ((r119 (##core#inline "C_i_car" (##core#ref c263 (4)))))
  (let ((r115 (##core#inline_allocate "C_a_i_cons" 3 r119 (##core#ref c263 (3)))))
  ((##core#unbox (##core#ref c263 (2)) ())
  (##core#ref c263 (1))
  r111
  '()
  r115))))
  (##core#ref c261 (2))
  (##core#ref c261 (3))
  (##core#ref c261 (4))
  (##core#ref c261 (5)))))
  (let ((r123 (##core#inline "C_i_cdr" (##core#ref c261 (5)))))
  (append k110 r123 (##core#ref c261 (1)))))
  ((##core#ref c261 (2)) '0)))
  y10
  k84
  (##core#ref c251 (2))
  z11
  x9)))
  (let ((r127 (##core#inline "C_i_car" x9)))
  ((##core#unbox (##core#ref c251 (1)) ())
  k103
  r127
  '1
  z11))))))
  ok?3
  try2
  '#<lambda info (try x9 y10 z11)#>))))
  (let ((t129 (##core#updatebox
  ok?3
  (##core#closure
  (3)
  (lambda (c269 k131 row12 dist13 placed14)
  (if (##core#inline "C_i_nullp" placed14)
  (k131 '#t)
  (let ((r179 (##core#inline "C_i_car" placed14)))
  (let ((r183 (##core#inline_allocate "C_a_i_plus" 4 row12 dist13)))
  (if (##core#inline "C_i_nequalp" r179 r183)
  (k131 '#f)
  (let ((r167 (##core#inline "C_i_car" placed14)))
  (let ((r171 (##core#inline_allocate "C_a_i_minus" 4 row12 dist13)))
  (if (##core#inline "C_i_nequalp" r167 r171)
  (k131 '#f)
  (let ((r155 (##core#inline_allocate "C_a_i_plus" 4 dist13 '1)))
  (let ((r159 (##core#inline "C_i_cdr" placed14)))
  ((##core#unbox (##core#ref c269 (1)) ())
  k131
  row12
  r155
  r159)))))))))))
  ok?3
  '#<lambda info (ok? row12 dist13 placed14)#>))))
  (let ((k189 (##core#closure (3)
  (##core#lambda (c277 r190)
  ((##core#unbox (##core#ref c277 (2)) ())
  (##core#ref c277 (1))
  r190
  '()
  '()))
  k38
  try2)))
  (let ((loop5278 (##core#undefined)))
  (let ((loop5 (##core#box loop5278)))
  (let ((t45 (##core#updatebox
  loop5
  (##core#closure (3)
  (lambda (c280 k47 i6 l7)
  (if (##core#inline "C_i_nequalp" i6 '0)
  (k47 l7)
  (let ((r59 (##core#inline_allocate "C_a_i_minus" 4 i6 '1)))
  (let ((r63 (##core#inline_allocate "C_a_i_cons" 3 i6 l7)))
  ((##core#unbox (##core#ref c280 (1)) ())
  k47
  r59
  r63)))))
  loop5
  '#<lambda info (loop i6 l7)#>))))
  ((##core#unbox loop5 ()) k189 n0 '()))))))))))))
  '#<lambda info (nqueens n0)#>))))
  (let ((k192 (##core#closure (2)
  (##core#lambda (c284 r193)
  (let ((k195 (##core#closure (2)
  (##core#lambda (c286 r196)
  ((##core#ref c286 (1)) (##core#undefined)))
  (##core#ref c284 (1)))))
  (let ((k198 (##core#closure (2)
  (##core#lambda (c288 r199)
  (r199 (##core#ref c288 (1))))
  k195)))
  (##sys#implicit-exit-handler k198))))
  (##core#ref c245 (1)))))
  (nqueens k192 '8))))
  (##core#ref c243 (1)))))
  (##core#callunit "extras" k33)))
  (##core#ref c241 (1)))))
  (##core#callunit "eval" k30)))
  k26)))
  (##core#callunit "library" k27)))
'#<lambda info (toplevel)#>)

Code generation

Here the output of the compiler in it's full glory. First the header:

/* Generated from nqueens.scm by the CHICKEN compiler
   http://www.call-with-current-continuation.org
   2007-05-27 22:05
   Version 2.615 - macosx-unix-gnu-ppc - [ libffi dload ptables applyhook ]
   command line: nqueens.scm -output-file nqueens.c -quiet -optimize-level 2
   used units: library eval extras
*/

#include "chicken.h"

Now function prototypes for library units that we use follow (these are the default library units used):

static C_PTABLE_ENTRY *create_ptable(void);
C_noret_decl(C_library_toplevel)
C_externimport void C_ccall C_library_toplevel(C_word c,C_word d,C_word k) C_noret;
C_noret_decl(C_eval_toplevel)
C_externimport void C_ccall C_eval_toplevel(C_word c,C_word d,C_word k) C_noret;
C_noret_decl(C_extras_toplevel)
C_externimport void C_ccall C_extras_toplevel(C_word c,C_word d,C_word k) C_noret;

The static global lf holds literal (constant) data that should not be garbage collected and is used in the compiled program. It also holds non-exported (hidden) toplevel variables:

static C_TLS C_word lf[8];

More prototypes, this time for the functions generated:

C_noret_decl(C_toplevel)
C_externexport void C_ccall C_toplevel(C_word c,C_word t0,C_word t1) C_noret;
C_noret_decl(f_29)
static void C_ccall f_29(C_word c,C_word t0,C_word t1) C_noret;
C_noret_decl(f_32)
static void C_ccall f_32(C_word c,C_word t0,C_word t1) C_noret;
C_noret_decl(f_35)
static void C_ccall f_35(C_word c,C_word t0,C_word t1) C_noret;
C_noret_decl(f_194)
static void C_ccall f_194(C_word c,C_word t0,C_word t1) C_noret;
C_noret_decl(f_200)
static void C_ccall f_200(C_word c,C_word t0,C_word t1) C_noret;
C_noret_decl(f_197)
static void C_ccall f_197(C_word c,C_word t0,C_word t1) C_noret;
C_noret_decl(f_37)
static void C_ccall f_37(C_word c,C_word t0,C_word t1,C_word t2) C_noret;
C_noret_decl(f_46)
static void C_fcall f_46(C_word t0,C_word t1,C_word t2,C_word t3) C_noret;
C_noret_decl(f_191)
static void C_ccall f_191(C_word c,C_word t0,C_word t1) C_noret;
C_noret_decl(f_130)
static void C_fcall f_130(C_word t0,C_word t1,C_word t2,C_word t3,C_word t4) C_noret;
C_noret_decl(f_66)
static void C_fcall f_66(C_word t0,C_word t1,C_word t2,C_word t3,C_word t4) C_noret;
C_noret_decl(f_105)
static void C_ccall f_105(C_word c,C_word t0,C_word t1) C_noret;
C_noret_decl(f_112)
static void C_ccall f_112(C_word c,C_word t0,C_word t1) C_noret;
C_noret_decl(f_86)
static void C_ccall f_86(C_word c,C_word t0,C_word t1) C_noret;
C_noret_decl(f_90)
static void C_ccall f_90(C_word c,C_word t0,C_word t1) C_noret;

Here the trampolines are declared and defined. For every generated C function from the CPS representation, we need a trampoline function that has a fixed calling convention and can be passed to the garbage collector when the stack is exhausted (the allocation limit we mentioned above). The trampoline for a given function will call the original function with the restored arguments, continuation and closure record. The trampolines starting with trf_ are custom: the associated functions have been detected during optimization to be customizable and follow a slightly different calling convention and thus need a specific trampoline. The trampolines starting with tr_ are general ones and can be re-used for all functions with the matching number of arguments.

C_noret_decl(trf_46)
static void C_fcall trf_46(void *dummy) C_regparm C_noret;
C_regparm static void C_fcall trf_46(void *dummy){
/* pick arguments and continuation from "temporary stack": */
C_word t3=C_pick(0);
C_word t2=C_pick(1);
C_word t1=C_pick(2);
C_word t0=C_pick(3);
/* pop values from temporary stack: */
C_adjust_stack(-4);
f_46(t0,t1,t2,t3);}

C_noret_decl(trf_130)
static void C_fcall trf_130(void *dummy) C_regparm C_noret;
C_regparm static void C_fcall trf_130(void *dummy){
C_word t4=C_pick(0);
C_word t3=C_pick(1);
C_word t2=C_pick(2);
C_word t1=C_pick(3);
C_word t0=C_pick(4);
C_adjust_stack(-5);
f_130(t0,t1,t2,t3,t4);}

C_noret_decl(trf_66)
static void C_fcall trf_66(void *dummy) C_regparm C_noret;
C_regparm static void C_fcall trf_66(void *dummy){
C_word t4=C_pick(0);
C_word t3=C_pick(1);
C_word t2=C_pick(2);
C_word t1=C_pick(3);
C_word t0=C_pick(4);
C_adjust_stack(-5);
f_66(t0,t1,t2,t3,t4);}

C_noret_decl(tr3)
static void C_fcall tr3(C_proc3 k) C_regparm C_noret;
C_regparm static void C_fcall tr3(C_proc3 k){
C_word t2=C_pick(0);
C_word t1=C_pick(1);
C_word t0=C_pick(2);
C_adjust_stack(-3);
(k)(3,t0,t1,t2);}

C_noret_decl(tr2)
static void C_fcall tr2(C_proc2 k) C_regparm C_noret;
C_regparm static void C_fcall tr2(C_proc2 k){
C_word t1=C_pick(0);
C_word t0=C_pick(1);
C_adjust_stack(-2);
(k)(2,t0,t1);}

Here comes the toplevel, the procedure holding the compiled toplevel expressions that are not contained in user procedures.

But first the trampoline for the toplevel:

/* toplevel */
static C_TLS int toplevel_initialized=0;
C_main_entry_point
C_noret_decl(toplevel_trampoline)
static void C_fcall toplevel_trampoline(void *dummy) C_regparm C_noret;
C_regparm static void C_fcall toplevel_trampoline(void *dummy){
/* just invoke toplevel with continuation: */
C_toplevel(2,C_SCHEME_UNDEFINED,C_restore);}

And now the body:

void C_ccall C_toplevel(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word *a;
/* was this compilation unit already executed? then return to caller: */
if(toplevel_initialized) C_kontinue(t1,C_SCHEME_UNDEFINED);
/* else note entry (this will output the start of executing the compilation
   unit toplevel when debug mode is enabled with the "-:d" runtime option): */
else C_toplevel_entry(C_text("toplevel"));
/* resize nursery (first heap generation) to value given to compiler (here
   it is the default): */
C_resize_stack(131072);
/* check whether the nursery has generally least enough space for all literals
   we create in this unit: */
C_check_nursery_minimum(3);
/* Is the current level (as opposed to the total capacity) of the nursery ok: */
if(!C_demand(3)){
  /* no - save temporaries and invoke a minor (nursery) garbage collection,
     passing the proper trampoline to re-enter the function: */
C_save(t1);
C_reclaim((void*)toplevel_trampoline,NULL);}
/* otherwise mark as initialized: */
toplevel_initialized=1;
/* check whether the second-generation heap is big enough for all literals
   defined here: */
if(!C_demand_2(30)){
  /* no - invoke major GC with minimum space required: */
C_save(t1);
C_rereclaim2(30*sizeof(C_word), 1);
/* restore temporaries (pop from temporary stack): */
t1=C_restore;}
/* allocate storage for the data we create in the nursery (on the
stack). This is only the closure, the rest is already created in the
second generation (in the heap), since it will live forever anyway: */
a=C_alloc(3);
/* initialize a literal frame record: */
C_initialize_lf(lf,8);
/* intern symbols that we are going to us as toplevel variables: */
lf[0]=C_h_intern(&lf[0],7,"nqueens");
lf[1]=C_h_intern(&lf[1],6,"append");
/* these "lambda-info" strings are used to show more meaningful output
   when printing a procedure: */
lf[2]=C_static_lambda_info(C_heaptop,16,"(try x9 y10 z11)");
lf[3]=C_static_lambda_info(C_heaptop,27,"(ok\077 row12 dist13 placed14)");
lf[4]=C_static_lambda_info(C_heaptop,12,"(loop i6 l7)");
lf[5]=C_static_lambda_info(C_heaptop,12,"(nqueens n0)");
lf[6]=C_h_intern(&lf[6],25,"\003sysimplicit-exit-handler");
lf[7]=C_static_lambda_info(C_heaptop,10,"(toplevel)");
/* register literal frame globally to be traversed on every major GC-.
This also creates the "procedure table" for serialization (if enabled): */
C_register_lf2(lf,8,create_ptable());
/* allocate our first closure record - there are going to be more of those...
   (note the store of temporary "t1", which is the continuation of the call
   to this toplevel procedure). "f_29" is "k27" below: */
t2=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_29,a[2]=t1,tmp=(C_word)a,a+=3,tmp);
/* Invoke the "library" unit (done by default by compiled code, unless 
   the "-explicit-use" option is given): */
C_library_toplevel(2,C_SCHEME_UNDEFINED,t2);}

Next the code for user procedures and continuation closures introduced by the CPS conversion:

/* k27 */
static void C_ccall f_29(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word ab[3],*a=ab;
/* a continuation closure - it checks for interrupts (signals or
   timer-interrupts) first. If an interrupt occurred, the stack-limit
   is temporarily set to the maximum to trigger a GC (which will discover
   that we actually have an interrupt situation and handle accordingly): */
C_check_for_interrupt;
/* check stack level (we don't allocate, but check): */
if(!C_stack_probe(&a)){
  /* nursery exhausted - GC: */
C_save_and_reclaim((void*)tr2,(void*)f_29,2,t0,t1);}
/* allocate continuation and invoke "eval" unit, another default library: */
t2=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_32,a[2]=((C_word*)t0)[2],tmp=(C_word)a,a+=3,tmp);
C_eval_toplevel(2,C_SCHEME_UNDEFINED,t2);}

/* k30 in k27 */
static void C_ccall f_32(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word ab[3],*a=ab;
/* nothing new, call "extras" unit: */
C_check_for_interrupt;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_32,2,t0,t1);}
t2=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_35,a[2]=((C_word*)t0)[2],tmp=(C_word)a,a+=3,tmp);
C_extras_toplevel(2,C_SCHEME_UNDEFINED,t2);}

/* k33 in k30 in k27 */
static void C_ccall f_35(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word t4;
C_word ab[6],*a=ab;
C_check_for_interrupt;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_35,2,t0,t1);}
/* an assignment! we set the "nqueens" variable to a procedure value, which we 
   allocate right away. We actually mutate slot #0 (offset 1) of the symbol
   "nqueens": */
t2=C_mutate((C_word*)lf[0]+1,(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_37,a[2]=lf[5],
                              tmp=(C_word)a,a+=3,tmp));
/* allocate yet another continuation closure: */
t3=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_194,a[2]=((C_word*)t0)[2],tmp=(C_word)a,a+=3,tmp);
/* write string into trace-buffer to give us a call-chain on errors: */
C_trace("nqueens.scm: 28   nqueens");
/* retrieve the value and call "nqueens" with the argument "8": */
t4=*((C_word*)lf[0]+1);
((C_proc3)C_retrieve_proc(t4))(3,t4,t3,C_fix(8));}

/* k192 in k33 in k30 in k27 */
static void C_ccall f_194(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word t4;
C_word ab[6],*a=ab;
/* this the continuation of the "(nqueens 8)" call - it invokes
   the "##sys#implicit-exit-handler", which is responsible for
   cleaning up, when a program just "falls off the end": */
C_check_for_interrupt;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_194,2,t0,t1);}
/* 2 continuations? Indeed as we call the result of calling the
   handler: */
t2=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_197,a[2]=((C_word*)t0)[2],tmp=(C_word)a,a+=3,tmp);
t3=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_200,a[2]=t2,tmp=(C_word)a,a+=3,tmp);
C_trace("##sys#implicit-exit-handler");
t4=C_retrieve(lf[6]);
((C_proc2)C_retrieve_proc(t4))(2,t4,t3);}

/* k198 in k192 in k33 in k30 in k27 */
static void C_ccall f_200(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word *a;
/* a "trivial" continuation - we omit checking stack or interrupts here: */
t2=t1;
((C_proc2)C_retrieve_proc(t2))(2,t2,((C_word*)t0)[2]);}

/* k195 in k192 in k33 in k30 in k27 */
static void C_ccall f_197(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word *a;
t2=((C_word*)t0)[2];
((C_proc2)(void*)(*((C_word*)t2+1)))(2,t2,C_SCHEME_UNDEFINED);}

/* nqueens in k33 in k30 in k27 */
static void C_ccall f_37(C_word c,C_word t0,C_word t1,C_word t2){
C_word tmp;
C_word t3;
C_word t4;
C_word t5;
C_word t6;
C_word t7;
C_word t8;
C_word t9;
C_word t10;
C_word t11;
C_word t12;
C_word t13;
C_word ab[23],*a=ab;
/* here we go. First, check argument count: */
if(c!=3) C_bad_argc_2(c,3,t0);
C_check_for_interrupt;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr3,(void*)f_37,3,t0,t1,t2);}
/* here we create and fill "boxes": mutable cells that hold "letrec" bound
   lexical variables: */
t3=C_SCHEME_UNDEFINED;
t4=(*a=C_VECTOR_TYPE|1,a[1]=t3,tmp=(C_word)a,a+=2,tmp);
t5=C_SCHEME_UNDEFINED;
t6=(*a=C_VECTOR_TYPE|1,a[1]=t5,tmp=(C_word)a,a+=2,tmp);
t7=C_set_block_item(t4,0,(*a=C_CLOSURE_TYPE|4,a[1]=(C_word)f_66,a[2]=t6,a[3]=t4,
                          a[4]=lf[2],tmp=(C_word)a,a+=5,tmp));
t8=C_set_block_item(t6,0,(*a=C_CLOSURE_TYPE|3,a[1]=(C_word)f_130,a[2]=t6,a[3]=lf[3],
                          tmp=(C_word)a,a+=4,tmp));
t9=(*a=C_CLOSURE_TYPE|3,a[1]=(C_word)f_191,a[2]=t1,a[3]=t4,tmp=(C_word)a,a+=4,tmp);
t10=C_SCHEME_UNDEFINED;
t11=(*a=C_VECTOR_TYPE|1,a[1]=t10,tmp=(C_word)a,a+=2,tmp);
t12=C_set_block_item(t11,0,(*a=C_CLOSURE_TYPE|3,a[1]=(C_word)f_46,a[2]=t11,a[3]=lf[4],
                            tmp=(C_word)a,a+=4,tmp));
t13=((C_word*)t11)[1];
/* invoke the loop - since this procedure is "known", we can call
   it directly: */
f_46(t13,t9,t2,C_SCHEME_END_OF_LIST);}

/* loop in nqueens in k33 in k30 in k27 */
static void C_fcall f_46(C_word t0,C_word t1,C_word t2,C_word t3){
C_word tmp;
C_word t4;
C_word t5;
C_word t6;
C_word t7;
C_word t8;
C_word t9;
C_word *a;
/* the loop could be optimized, since enough information was available
   to the compiler: */
loop:
a=C_alloc(7);
C_check_for_interrupt;
if(!C_stack_probe(a)){
C_save_and_reclaim((void*)trf_46,NULL,4,t0,t1,t2,t3);}
/* a conditional expression, translated to C: */
if(C_truep((C_word)C_i_nequalp(t2,C_fix(0)))){
t4=t1;
((C_proc2)(void*)(*((C_word*)t4+1)))(2,t4,t3);}
else{
  /* primitive operations: */
t4=(C_word)C_a_i_minus(&a,2,t2,C_fix(1));
t5=(C_word)C_a_i_cons(&a,2,t2,t3);
C_trace("nqueens.scm: 7    loop");
/* a self-recursive tail call, just shuffle arguments around
   and jump: */
t7=t1;
t8=t4;
t9=t5;
t1=t7;
t2=t8;
t3=t9;
goto loop;}}

/* k189 in nqueens in k33 in k30 in k27 */
static void C_ccall f_191(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word *a;
C_trace("nqueens.scm: 26   try");
/* we refer to a lexical variable nested in the closure record
   "t0", and boxed (so an indirect reference): */
t2=((C_word*)((C_word*)t0)[3])[1];
f_66(t2,((C_word*)t0)[2],t1,C_SCHEME_END_OF_LIST,C_SCHEME_END_OF_LIST);}

/* ok? in nqueens in k33 in k30 in k27 */
static void C_fcall f_130(C_word t0,C_word t1,C_word t2,C_word t3,C_word t4){
C_word tmp;
C_word t5;
C_word t6;
C_word t7;
C_word t8;
C_word t9;
C_word t10;
C_word t11;
C_word t12;
C_word t13;
C_word t14;
C_word t15;
C_word *a;
/* this should be familiar by now: */
loop:
a=C_alloc(12);
C_check_for_interrupt;
if(!C_stack_probe(a)){
C_save_and_reclaim((void*)trf_130,NULL,5,t0,t1,t2,t3,t4);}
if(C_truep((C_word)C_i_nullp(t4))){
t5=t1;
/* an ugly way to call the function pointer in the continuation
   closure record: */
((C_proc2)(void*)(*((C_word*)t5+1)))(2,t5,C_SCHEME_TRUE);}
else{
  /* primitives and more conditionals - see the source code above
     to correlate the translation to the s-expressions: */
t5=(C_word)C_i_car(t4);
t6=(C_word)C_a_i_plus(&a,2,t2,t3);
if(C_truep((C_word)C_i_nequalp(t5,t6))){
t7=t1;
((C_proc2)(void*)(*((C_word*)t7+1)))(2,t7,C_SCHEME_FALSE);}
else{
t7=(C_word)C_i_car(t4);
t8=(C_word)C_a_i_minus(&a,2,t2,t3);
if(C_truep((C_word)C_i_nequalp(t7,t8))){
t9=t1;
((C_proc2)(void*)(*((C_word*)t9+1)))(2,t9,C_SCHEME_FALSE);}
else{
t9=(C_word)C_a_i_plus(&a,2,t3,C_fix(1));
t10=(C_word)C_i_cdr(t4);
C_trace("nqueens.scm: 24   ok?");
t12=t1;
t13=t2;
t14=t9;
t15=t10;
t1=t12;
t2=t13;
t3=t14;
t4=t15;
goto loop;}}}}

It goes on like this, we just show it for completeness:

/* try in nqueens in k33 in k30 in k27 */
static void C_fcall f_66(C_word t0,C_word t1,C_word t2,C_word t3,C_word t4){
C_word tmp;
C_word t5;
C_word t6;
C_word t7;
C_word t8;
C_word t9;
C_word ab[14],*a=ab;
C_check_for_interrupt;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)trf_66,NULL,5,t0,t1,t2,t3,t4);}
if(C_truep((C_word)C_i_nullp(t2))){
t5=(C_word)C_i_nullp(t3);
t6=t1;
((C_proc2)(void*)(*((C_word*)t6+1)))(2,t6,(C_truep(t5)?C_fix(1):C_fix(0)));}
else{
t5=(*a=C_CLOSURE_TYPE|6,a[1]=(C_word)f_86,a[2]=t4,a[3]=((C_word*)t0)[3],
    a[4]=t3,a[5]=t2,a[6]=t1,tmp=(C_word)a,a+=7,tmp);
t6=(*a=C_CLOSURE_TYPE|6,a[1]=(C_word)f_105,a[2]=t3,a[3]=t5,
    a[4]=((C_word*)t0)[3],a[5]=t4,a[6]=t2,tmp=(C_word)a,a+=7,tmp);
t7=(C_word)C_i_car(t2);
C_trace("nqueens.scm: 14   ok?");
t8=((C_word*)((C_word*)t0)[2])[1];
f_130(t8,t6,t7,C_fix(1),t4);}}

/* k103 in try in nqueens in k33 in k30 in k27 */
static void C_ccall f_105(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word t4;
C_word ab[6],*a=ab;
C_check_for_interrupt;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_105,2,t0,t1);}
if(C_truep(t1)){
t2=(*a=C_CLOSURE_TYPE|5,a[1]=(C_word)f_112,a[2]=((C_word*)t0)[3],a[3]=((C_word*)t0)[4],
    a[4]=((C_word*)t0)[5],a[5]=((C_word*)t0)[6],tmp=(C_word)a,a+=6,tmp);
t3=(C_word)C_i_cdr(((C_word*)t0)[6]);
C_trace("nqueens.scm: 15   append");
t4=*((C_word*)lf[1]+1);
((C_proc4)C_retrieve_proc(t4))(4,t4,t2,t3,((C_word*)t0)[2]);}
else{
t2=((C_word*)t0)[3];
f_86(2,t2,C_fix(0));}}

/* k110 in k103 in try in nqueens in k33 in k30 in k27 */
static void C_ccall f_112(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word t4;
C_word ab[3],*a=ab;
C_check_for_interrupt;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_112,2,t0,t1);}
t2=(C_word)C_i_car(((C_word*)t0)[5]);
t3=(C_word)C_a_i_cons(&a,2,t2,((C_word*)t0)[4]);
C_trace("nqueens.scm: 15   try");
t4=((C_word*)((C_word*)t0)[3])[1];
f_66(t4,((C_word*)t0)[2],t1,C_SCHEME_END_OF_LIST,t3);}

/* k84 in try in nqueens in k33 in k30 in k27 */
static void C_ccall f_86(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word t4;
C_word t5;
C_word t6;
C_word ab[7],*a=ab;
C_check_for_interrupt;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_86,2,t0,t1);}
t2=(*a=C_CLOSURE_TYPE|3,a[1]=(C_word)f_90,a[2]=t1,a[3]=((C_word*)t0)[6],tmp=(C_word)a,a+=4,tmp);
t3=(C_word)C_i_cdr(((C_word*)t0)[5]);
t4=(C_word)C_i_car(((C_word*)t0)[5]);
t5=(C_word)C_a_i_cons(&a,2,t4,((C_word*)t0)[4]);
C_trace("nqueens.scm: 17   try");
t6=((C_word*)((C_word*)t0)[3])[1];
f_66(t6,t2,t3,t5,((C_word*)t0)[2]);}

/* k88 in k84 in try in nqueens in k33 in k30 in k27 */
static void C_ccall f_90(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word ab[4],*a=ab;
C_check_for_interrupt;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_90,2,t0,t1);}
t2=((C_word*)t0)[3];
((C_proc2)(void*)(*((C_word*)t2+1)))(2,t2,(C_word)C_a_i_plus(&a,2,((C_word*)t0)[2],t1));}

Finally, the procedure table:

#ifdef C_ENABLE_PTABLES
static C_PTABLE_ENTRY ptable[17] = {
{"toplevelnqueens.scm",(void*)C_toplevel},
{"f_29nqueens.scm",(void*)f_29},
{"f_32nqueens.scm",(void*)f_32},
{"f_35nqueens.scm",(void*)f_35},
{"f_194nqueens.scm",(void*)f_194},
{"f_200nqueens.scm",(void*)f_200},
{"f_197nqueens.scm",(void*)f_197},
{"f_37nqueens.scm",(void*)f_37},
{"f_46nqueens.scm",(void*)f_46},
{"f_191nqueens.scm",(void*)f_191},
{"f_130nqueens.scm",(void*)f_130},
{"f_66nqueens.scm",(void*)f_66},
{"f_105nqueens.scm",(void*)f_105},
{"f_112nqueens.scm",(void*)f_112},
{"f_86nqueens.scm",(void*)f_86},
{"f_90nqueens.scm",(void*)f_90},
{NULL,NULL}};
#endif

static C_PTABLE_ENTRY *create_ptable(void){
#ifdef C_ENABLE_PTABLES
return ptable;
#else
return NULL;
#endif
}

Phew. Done.

/* end of file */

Another example

No, you are not quite through yet. Let's look at a simpler example, with full optimizations turned on and with safety checks disabled. The example program is the takl benchmark from the Gabriel benchmark suite[2] (translated to Scheme by Will Clinger):

(define (listn n)
  (if (= 0 n)
      '()
      (cons n (listn (- n 1)))) )
 
(define 18l (listn 18))
(define 12l (listn 12))
(define  6l (listn 6))
 
(define (mas x y z)
  (if (not (shorterp y x))
      z
      (mas (mas (cdr x)
		y z)
	   (mas (cdr y)
		z x)
	   (mas (cdr z)
		x y))))
 
(define (shorterp x y)
  (and (pair? y)
       (or (null? x)
	   (shorterp (cdr x)
		     (cdr y)))) )
 
(mas 18l 12l 6l)

After canonicalization:

(##core#callunit "library")
(##core#callunit "eval")
(##core#callunit "extras")
(##core#undefined)
(##core#undefined)
(set! listn (lambda (n0) (if (= '0 n0) '() (cons n0 (listn (- n0 '1))))))
(set! 18l (listn '18))
(set! 12l (listn '12))
(set! 6l (listn '6))
(set! mas
  (lambda (x1 y2 z3)
    (if (not (shorterp y2 x1))
      z3
      (mas (mas (cdr x1) y2 z3) (mas (cdr y2) z3 x1) (mas (cdr z3) x1 y2)))))
(set! shorterp
  (lambda (x4 y5)
    (if (pair? y5)
      (let ((g67 (null? x4))) (if g67 g67 (shorterp (cdr x4) (cdr y5))))
      '#f)))
(mas 18l 12l 6l)
((##sys#implicit-exit-handler))
(##core#undefined)

The generated C code looks like this (we have omitted less interesting parts):

/* k22 */
static void C_ccall f_24(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word ab[3],*a=ab;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_24,2,t0,t1);}
t2=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_27,a[2]=((C_word*)t0)[2],tmp=(C_word)a,a+=3,tmp);
C_eval_toplevel(2,C_SCHEME_UNDEFINED,t2);}

/* k25 in k22 */
static void C_ccall f_27(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word ab[3],*a=ab;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_27,2,t0,t1);}
t2=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_30,a[2]=((C_word*)t0)[2],tmp=(C_word)a,a+=3,tmp);
C_extras_toplevel(2,C_SCHEME_UNDEFINED,t2);}

/* k28 in k25 in k22 */
static void C_ccall f_30(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word t4;
C_word ab[5],*a=ab;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_30,2,t0,t1);}
/* we use the literal frame slot directly for "listn": since this is compiled in block mode,
   we don't use normal symbols as toplevel variables, as they are not to
   be accessed from outside: */
t2=C_mutate(&lf[0],(*a=C_CLOSURE_TYPE|1,a[1]=(C_word)f_32,tmp=(C_word)a,a+=2,tmp));
t3=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_54,a[2]=((C_word*)t0)[2],tmp=(C_word)a,a+=3,tmp);
/* takl.scm: 9    listn */
f_32(t3,C_fix(18));}

/* k52 in k28 in k25 in k22 */
static void C_ccall f_54(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word t4;
C_word ab[3],*a=ab;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_54,2,t0,t1);}
/* here we store the result in "18l": */
t2=C_mutate(&lf[1],t1);
t3=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_58,a[2]=((C_word*)t0)[2],tmp=(C_word)a,a+=3,tmp);
/* takl.scm: 10   listn */
f_32(t3,C_fix(12));}

/* k56 in k52 in k28 in k25 in k22 */
static void C_ccall f_58(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word t4;
C_word ab[3],*a=ab;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_58,2,t0,t1);}
/* ... "12l" ... */
t2=C_mutate(&lf[2],t1);
t3=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_62,a[2]=((C_word*)t0)[2],tmp=(C_word)a,a+=3,tmp);
/* takl.scm: 11   listn */
f_32(t3,C_fix(6));}

/* k60 in k56 in k52 in k28 in k25 in k22 */
static void C_ccall f_62(C_word c,C_word t0,C_word t1){
C_word tmp;
C_word t2;
C_word t3;
C_word t4;
C_word t5;
C_word t6;
C_word t7;
C_word t8;
C_word ab[10],*a=ab;
if(!C_stack_probe(&a)){
C_save_and_reclaim((void*)tr2,(void*)f_62,2,t0,t1);}
/* ... and "6l": */
t2=C_mutate(&lf[3],t1);
/* set "mas" and "shorterp": */
t3=C_mutate(&lf[4],(*a=C_CLOSURE_TYPE|1,a[1]=(C_word)f_64,tmp=(C_word)a,a+=2,tmp));
t4=C_mutate(&lf[5],(*a=C_CLOSURE_TYPE|1,a[1]=(C_word)f_104,tmp=(C_word)a,a+=2,tmp));
/* and call "mas", directly: */
t5=f_64(lf[1],lf[2],lf[3]);
t6=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_134,a[2]=((C_word*)t0)[2],tmp=(C_word)a,a+=3,tmp);
t7=(*a=C_CLOSURE_TYPE|2,a[1]=(C_word)f_137,a[2]=t6,tmp=(C_word)a,a+=3,tmp);
/* ##sys#implicit-exit-handler */
t8=*((C_word*)lf[6]+1);
((C_proc2)(void*)(*((C_word*)t8+1)))(2,t8,t7);}

/* shorterp in k60 in k56 in k52 in k28 in k25 in k22 */
static C_word C_fcall f_104(C_word t1,C_word t2){
C_word tmp;
C_word t3;
C_word t4;
C_word t5;
C_word t6;
C_word t7;
C_word t8;
/* tight code: */
loop:
if(C_truep((C_word)C_i_pairp(t2))){
t3=(C_word)C_i_nullp(t1);
if(C_truep(t3)){
return(t3);}
else{
t4=(C_word)C_slot(t1,C_fix(1));
t5=(C_word)C_slot(t2,C_fix(1));
t7=t4;
t8=t5;
t1=t7;
t2=t8;
goto loop;}}
else{
return(C_SCHEME_FALSE);}}

/* mas in k60 in k56 in k52 in k28 in k25 in k22 */
static C_word C_fcall f_64(C_word t1,C_word t2,C_word t3){
C_word tmp;
C_word t4;
C_word t5;
C_word t6;
C_word t7;
C_word t8;
C_word t9;
C_word t10;
C_word t11;
C_word t12;
C_word t13;
C_word t14;
/* more tight code: */
loop:
t4=f_104(t2,t1);
if(C_truep(t4)){
t5=(C_word)C_slot(t1,C_fix(1));
t6=f_64(t5,t2,t3);
t7=(C_word)C_slot(t2,C_fix(1));
t8=f_64(t7,t3,t1);
t9=(C_word)C_slot(t3,C_fix(1));
t10=f_64(t9,t1,t2);
t12=t6;
t13=t8;
t14=t10;
t1=t12;
t2=t13;
t3=t14;
goto loop;}
else{
return(t3);}}

/* listn in k28 in k25 in k22 */
static void C_fcall f_32(C_word t1,C_word t2){
C_word tmp;
C_word t3;
C_word t4;
C_word t5;
C_word t6;
C_word t7;
C_word t8;
C_word t9;
C_word *a;
/* "listn" allocates, so not quite as tight: */
loop:
a=C_alloc(4);
if(!C_stack_probe(a)){
C_save_and_reclaim((void*)trf_32,NULL,2,t1,t2);}
t3=t2;
t4=(C_word)C_eqp(C_fix(0),t3);
if(C_truep(t4)){
t5=t1;
((C_proc2)(void*)(*((C_word*)t5+1)))(2,t5,C_SCHEME_END_OF_LIST);}
else{
t5=(*a=C_CLOSURE_TYPE|3,a[1]=(C_word)f_46,a[2]=t2,a[3]=t1,tmp=(C_word)a,a+=4,tmp);
t6=(C_word)C_u_fixnum_difference(t2,C_fix(1));
/* takl.scm: 7    listn */
t8=t5;
t9=t6;
t1=t8;
t2=t9;
goto loop;}}

References

[1] "Cheney on the MTA or CONS should not cons its arguments", Henry Baker

[2] "Performance and Evaluation of Lisp Systems", Richard Gabriel