\(\gdef\eqq{\stackrel{?}{=}}\)

Bootstrapping a verified compiler for
an imperative language in Rocq

Kacper F. Korban, SYSTEMF, EPFL

Supervised by Magnus O. Myreen, Chalmers University of Technology, University of Gothenburg and Clément Pit-Claudel, EPFL

QR code

Why bootstrap a verified compiler?

Bootstrapping
a verified compiler
-> No extraction
Bootstrapping noun /ˈbuːtˌstræpɪŋ/
  1. the process of creating a compiler executable in which the compiler compiles its own source code
Self-hosting adjective /ˌself.ˈhəʊstɪŋ/
  1. of a language whose compiler is implemented in itself

Verified compiler setup

IMPL language

\[ \begin{alignat*}{3} e\ {:}{:}{=} &\;\; x \;|\;\; n \;|\;\; e_1\; +\; e_2 \\ &|\;\; e_1\; -\; e_2 \;|\;\; e_1\; \backslash\; e_2 \;|\;\; e_1[e_2] \\ cmp\ {:}{:}{=} &\;\; {<} \;|\;\; {=} \\ t\ {:}{:}{=} &\;\; e_1\; cmp\; e_2 \;|\;\; \mathsf{not}\; t \\ &|\;\; t_1\; \wedge\; t_2 \;|\;\; t_1\; \lor\; t_2 \\ c\ {:}{:}{=} &\;\; \varepsilon \;|\;\; c_1\; ;\; c_2 \;|\;\; x\; {=}\; e \\ &|\;\; e_1[e_2]\; {=}\; e_3 \;|\;\; \mathbf{if}\; t\; \mathbf{then}\; c_1\; \mathbf{else}\; c_2 \\ &|\;\; \mathbf{while}\; t\; c \;|\;\; x\;{=}\; f(e_1,...,e_k) \\ &|\;\; \mathbf{return}\; e \;|\;\; x\; {=}\; \mathsf{alloc}(e) \;|\;\; \mathsf{abort} \\ &|\;\; \mathsf{putchar}\; e \;|\;\; \mathsf{getchar}\; x \end{alignat*} \]

ASM langage

\[ \begin{alignat*}{3} c\ {:}{:}{=}\;&\;\; \mathsf{always} \;|\;\; \mathsf{less}\; r_1\; r_2 \;|\;\; \mathsf{equal}\; r_1\; r_2 \\[2mm] i\ {:}{:}{=}\;&\;\; \mathsf{const}\; r\; w \;|\;\; \mathsf{add}\; r_1\; r_2 \;|\;\; \mathsf{sub}\; r_1\; r_2 \\ &|\;\; \mathsf{div}\; r \;|\;\; \mathsf{jump}\; c\; n \;|\;\; \mathsf{call}\; n \\ &|\;\; \mathsf{mov}\; r_1\; r_2 \;|\;\; \mathsf{ret} \;|\;\; \mathsf{pop}\; r \\ &|\;\; \mathsf{push}\; r \;|\;\; \mathsf{add\_rsp}\; n \\ &|\;\; \mathsf{sub\_rsp}\; n \;|\;\; \mathsf{load\_rsp}\; r\; n \\ &|\;\; \mathsf{store\_rsp}\; r\; n \;|\;\; \mathsf{load}\; r_1\; r_2\; o \\ &|\;\; \mathsf{store}\; r_1\; r_2\; o \;|\;\; \mathsf{getchar} \;|\;\; \mathsf{putchar} \\ &|\;\; \mathsf{exit} \;|\;\; \mathsf{comment}\; s \end{alignat*} \]

impl_to_asm_Rocq

...
Definition c_add: asm_appl :=
  let/d pop_rdi :=
    ASMSyntax.Pop RDI in
  let/d add_rax_rdi :=
    ASMSyntax.Add RAX RDI in
  let/d asm1 :=
    [pop_rdi; add_rax_rdi] in
  let/d res := List asm1 in
  res.
...
Definition impl_to_asm_Rocq
  (prog: IMPL): ASM := ...
We bootstrapped a verified compiler
for an imperative language
in Rocq

Contributions:

  • First standalone bootstrapping of a verified compiler in Rocq
  • First standalone bootstrapping of a verified compiler for an imperative language
  • New variant of relational compilation for proof-producing reification of a subset of Gallina

Bootstrap a verified compiler in 3 steps!

(* Reify the compiler code to FP using a tactic *)
let impl_to_asm_FP: FP := ltac2:(reify '@impl_to_asm_Rocq) in
(* Evaluate fp_to_impl_Rocq code generator in-logic on *)
(*  the reified deeply embedded compiler in FP *)
let impl_to_asm_IMPL: IMPL := fp_to_impl_Rocq impl_to_asm_FP in
(* Evaluate the compiler's own code generator in-logic *)
(*  on the reified deeply embedded compiler in IMPL *)
let impl_to_asm_ASM: ASM := impl_to_asm_Rocq impl_to_asm_IMPL in
(* Prettify the resulting ASM code of the compiler *)
pretty_print impl_to_asm_ASM
A diagram visualizing our bootstrapping process

1. Reification (Gallina -> FP)

  1. Define a target language
    \[ \begin{alignat*}{3} o\ {:}{:}{=}\;&\;\; + \;|\;\; - \;|\;\; \times \;|\;\; / \;|\;\; \mathsf{cons} \\ &\;|\;\; \mathsf{head} \;|\;\; \mathsf{tail} \;|\;\; \mathsf{read} \;|\;\; \mathsf{write} \\ t\ {:}{:}{=}\;&\;\; < \;|\;\; = \\ e\ {:}{:}{=}\;&\;\; n \;|\;\; x \;|\;\; o(e_1,\dots,e_k) \;|\;\; f(e_1,\dots,e_k) \\ &|\;\; \mathbf{if}\; e_1\; t\; e_2\; \mathbf{then}\; e_3\; \mathbf{else}\; e_4 \;|\;\; \mathbf{let}\; x\;=\; e_1\; \mathbf{in}\; e_2 \end{alignat*} \]
  2. State shallow to deep reification (compilation) as a proof search problem,
    \[ \begin{aligned} & \exists \; (\mathit{impl\_to\_asm\_FP}: \mathsf{FP}). \\ & \quad \forall \; (\mathit{inp}: \mathsf{string}). \\ & \quad\quad (\mathit{impl\_to\_asm\_FP}, \; \mathit{inp}) \\ & \quad\quad\quad \Downarrow_{\textsf{FP}} \mathsf{impl\_to\_asm\_Rocq} \; \mathit{inp} \end{aligned} \]
  3. Define compilation lemmas e.g.,
    \[ \begin{aligned} & \textit{x1} \Downarrow_{\textsf{FP}} \textit{n1} \; \rightarrow \; \textit{x2} \Downarrow_{\textsf{FP}} \textit{n2} \; \rightarrow \\ & \texttt{(Op Add [}\textit{x1}\texttt{;} \textit{x2}\texttt{])} \; \Downarrow_{\textsf{FP}} \; \textit{n1} \; \textsf{+} \; \textit{n2} \end{aligned} \]
  4. Write a compiler in Ltac2 for this decision procedure
    repeat (match! goal with
      | ... =>
        eapply compilation_lemmaX
      ...
    end).
    
Defun (name_enc "c_add") []
  (Let (name_enc "pop_rdi")
    (Op Cons
      [FunSyntax.Const (name_enc "Pop");
       Op Cons [FunSyntax.Const (name_enc "RDI"); FunSyntax.Const 0]])
    (Let (name_enc "add_rax_rdi")
      (Op Cons
        [FunSyntax.Const (name_enc "Add");
         Op Cons
          [FunSyntax.Const (name_enc "RAX");
           Op Cons [FunSyntax.Const (name_enc "RDI"); FunSyntax.Const 0]]])
      (Let (name_enc "asm1")
        (Op Cons
          [FunSyntax.Var (name_enc "pop_rdi");
           Op Cons [FunSyntax.Var (name_enc "add_rax_rdi"); FunSyntax.Const 0]])
        (Let (name_enc "res")
          (Op Cons
            [FunSyntax.Const (name_enc "List");
             Op Cons [FunSyntax.Var (name_enc "asm1"); FunSyntax.Const 0]])
          (FunSyntax.Var (name_enc "res"))))))

2. Lowering to IMPL (FP -> IMPL)

  1. Define a code generator from FP to IMPL
    Definition fp_to_impl_Rocq
      (p: FP): option IMPL := ...
    
  2. Verify it
    \[ \begin{align*} & \forall \; (\textit{fp}: \textsf{FP}), (\textit{inp}: \textsf{string}), (\textit{o}_1 \textit{o}_2: \textsf{string}). \\ & \quad (\textit{fp}, \; \textit{inp}) \Downarrow_{\textsf{FP}} \textit{o}_1 \; \rightarrow \\ & \quad (\textsf{fp\_to\_impl\_Rocq} \; \textit{fp}, \; \textit{inp}) \Downarrow_{\textsf{IMPL}} \textit{o}_2 \; \rightarrow \\ & \quad\quad \textit{o}_2 \approx \textit{o}_1 \end{align*} \]
Func 426801980516 []
  (Seq
    (Call 31647725819946089 124416
      [Const 5271408; Const 5391433; Const 0])
  (Seq
    (Call 117739892702980115996501097 124672
      [Const 4285540; Const 5390680; Const 5391433; Const 0])
  (Seq
    (Call 1634954545 124416
      [Var 31647725819946089; Var 117739892702980115996501097; Const 0])
  (Seq
    (Call 7497075 124416
      [Const 1281979252; Var 1634954545; Const 0])
  (Return (Var 7497075))))))

4. Double-compiling (IMPL -> ASM)

  1. Compile impl_to_asm with its assembly
    \[ \begin{aligned} & (\texttt{impl\_to\_asm\_ASM}, \; \texttt{impl\_to\_asm\_IMPL}) \\ & \quad \Downarrow_{\textsf{ASM}} \; \texttt{impl\_to\_asm\_ASM}_{DC} \end{aligned} \]
  2. Check that bootstrapping fixpoints
    \[ \begin{align*} & \texttt{impl\_to\_asm\_ASM}_{DC} = \texttt{impl\_to\_asm\_ASM} \end{align*} \]
[ASMSyntax.Sub_RSP 4;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 5271408;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 5391433;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 0;
 ASMSyntax.Pop ASMSyntax.RDI;
 ASMSyntax.Pop ASMSyntax.RDX;
 ASMSyntax.Call 0;
 ASMSyntax.Store_RSP ASMSyntax.RAX 4;
 ASMSyntax.Pop ASMSyntax.RAX;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 4285540;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 5390680;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 5391433;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 0;
 ASMSyntax.Pop ASMSyntax.RDI;
 ASMSyntax.Pop ASMSyntax.RDX;
 ASMSyntax.Pop ASMSyntax.RBX;
 ASMSyntax.Call 0;
 ASMSyntax.Store_RSP ASMSyntax.RAX 3;
 ASMSyntax.Pop ASMSyntax.RAX;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Load_RSP ASMSyntax.RAX 4;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Load_RSP ASMSyntax.RAX 4;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 0;
 ASMSyntax.Pop ASMSyntax.RDI;
 ASMSyntax.Pop ASMSyntax.RDX;
 ASMSyntax.Call 0;
 ASMSyntax.Store_RSP ASMSyntax.RAX 2;
 ASMSyntax.Pop ASMSyntax.RAX;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 1281979252;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Load_RSP ASMSyntax.RAX 3;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 0;
 ASMSyntax.Pop ASMSyntax.RDI;
 ASMSyntax.Pop ASMSyntax.RDX;
 ASMSyntax.Call 0;
 ASMSyntax.Store_RSP ASMSyntax.RAX 1;
 ASMSyntax.Pop ASMSyntax.RAX;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Load_RSP ASMSyntax.RAX 1;
 ASMSyntax.Add_RSP 5;
 ASMSyntax.Ret]

3. Bootstrapping (IMPL -> ASM)

  1. Compile impl_to_asm with itself
    \[ \begin{aligned} & \mathsf{impl\_to\_asm\_ASM} \triangleq \\ & \;\; \textsf{impl\_to\_asm\_Rocq} \; \texttt{impl\_to\_asm\_IMPL} \end{aligned} \]
  2. Apply impl_to_asm_Rocq correctness
    \[ \begin{align*} & \forall \; (\textit{inp}: \textsf{string}), (\textit{o}: \textsf{string}). \\ & \quad (\mathsf{impl\_to\_asm\_ASM}, \; \textit{inp}) \Downarrow_{\textsf{ASM}} \textit{o} \rightarrow \\ & \quad\quad \textit{o} \approx \textsf{impl\_to\_asm\_Rocq} \; \textit{inp} \end{align*} \]
[ASMSyntax.Sub_RSP 4;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 5271408;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 5391433;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 0;
 ASMSyntax.Pop ASMSyntax.RDI;
 ASMSyntax.Pop ASMSyntax.RDX;
 ASMSyntax.Call 0;
 ASMSyntax.Store_RSP ASMSyntax.RAX 4;
 ASMSyntax.Pop ASMSyntax.RAX;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 4285540;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 5390680;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 5391433;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 0;
 ASMSyntax.Pop ASMSyntax.RDI;
 ASMSyntax.Pop ASMSyntax.RDX;
 ASMSyntax.Pop ASMSyntax.RBX;
 ASMSyntax.Call 0;
 ASMSyntax.Store_RSP ASMSyntax.RAX 3;
 ASMSyntax.Pop ASMSyntax.RAX;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Load_RSP ASMSyntax.RAX 4;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Load_RSP ASMSyntax.RAX 4;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 0;
 ASMSyntax.Pop ASMSyntax.RDI;
 ASMSyntax.Pop ASMSyntax.RDX;
 ASMSyntax.Call 0;
 ASMSyntax.Store_RSP ASMSyntax.RAX 2;
 ASMSyntax.Pop ASMSyntax.RAX;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 1281979252;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Load_RSP ASMSyntax.RAX 3;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Const ASMSyntax.RAX 0;
 ASMSyntax.Pop ASMSyntax.RDI;
 ASMSyntax.Pop ASMSyntax.RDX;
 ASMSyntax.Call 0;
 ASMSyntax.Store_RSP ASMSyntax.RAX 1;
 ASMSyntax.Pop ASMSyntax.RAX;
 ASMSyntax.Push ASMSyntax.RAX;
 ASMSyntax.Load_RSP ASMSyntax.RAX 1;
 ASMSyntax.Add_RSP 5;
 ASMSyntax.Ret]

Ask me about:

  • Why don't we need to verify the parser?
  • Why is fp_to_impl_Rocq easy to write and verify?
  • How we reify non-structural recursion?
  • How does this relate to connected efforts in HOL4?
  • How we made HOL4-style proofs work in Rocq?
Related work
(Curzon, 1993) (Myreen & Owens, 2014) (Kumar et al., 2014) (Mullen et al., 2018) (Myreen, 2021) (Abrahamsson et al., 2020) (Pit-Claudel et al., 2022) (Forster et al., 2024)