(*|
====================
Week 2: Polymorphism
====================

Author
  `Adam Chlipala <http://adam.chlipala.net/frap/>`__, with modifications by CS-428 staff.
License
  No redistribution allowed (usage by permission in CS-428).

.. contents::

.. coq:: none
|*)

From Coq Require Import Strings.String Lia.
Open Scope string_scope.

(*|
One of the recurring ingredients in effective machine formalization is data
structures, and, as in general programming, we want to be able to define data
structures that are *generic* in the kinds of data they contain.  Coq
supports such things in roughly the same way as Haskell and OCaml, via
*parametric polymorphism*, which is closely related to the idea of *generics*
in languages like Java.

Options
=======

Our first example: the `option` type family.  While Java and friends force
all sorts of different types to include the special value `null`, in Coq we
request that option explicitly by wrapping a type in `option`.  Specifically,
any value of type `option A`, for some type `A`, is either `None` (sort of
like `null`) or `Some v` for a `v` of type `A`.
|*)

Inductive option {A : Set} : Set :=
| None
| Some (v : A).

Arguments option : clear implicits.

(*|
Here are a few example terms using `option`.
|*)

Example no_number : option nat := None.
Example a_number : option nat := Some 42.
Example no_number_squared : option (option nat) := None.
Example no_number_squared_inside : option (option nat) := Some None.
Example a_number_squared : option (option nat) := Some (Some 42).

(*|
Pattern matching is the key ingredient for working with inductive definitions
of all sorts.  Here are some examples matching on `option`\s.
|*)

Definition increment_optional (no : option nat) : option nat :=
  match no with
  | None => None
  | Some n => Some (n + 1)
  end.

(*|
Here we use type `A * B` of *pairs*, inhabited by values `(a, b)`, with
`a : A` and `b : B`.
|*)

Definition add_optional (po : option (nat * nat)) : option nat :=
  match po with
  | None => None
  | Some (n, m) => Some (n + m)
  end.

(*|
Lists
=====

For functional programming (as in Coq), the king of all generic data
structures is the *list*.
|*)

Inductive list {A : Set} : Set :=
| nil
| cons (hd : A) (tl : @list A).

Arguments list: clear implicits.

(*|
`nil` is the empty list, while `cons`, standing for "construct," extends a
list of length `n` into one of length `n+1`.

Here are some simple lists.
|*)

Example nats0 : list nat := nil.
Example nats1 : list nat := cons 1 nil.
Example nats2 : list nat := cons 1 (cons 2 nil).

(*|
Coq features a wonderful notation system, to help us write more concise and
readable code after introducing new syntactic forms.  We will not give a
systematic presentation of the notation system, but we will show many
examples, from which it is possible to infer generality by scientific
induction.  And, of course, the interested reader can always check the
notations chapter of the Coq reference manual.

First, our examples can get more readable with an infix operator for `cons`.
|*)

Infix "::" := cons.

Example nats1' : list nat := 1 :: nil.
Example nats2' : list nat := 1 :: 2 :: nil.

(*|
Getting even more fancy, we declare a notation for list literals.
|*)

Notation "[ ]" := nil.
Notation "[ x1 ; .. ; xN ]" := (cons x1 (.. (cons xN nil) ..)).

Example nats0'' : list nat := [].
Example nats1'' : list nat := [1].
Example nats2'' : list nat := [1; 2].
Example nats3'' : list nat := [1; 2; 3].

(*|
Here are some classic recursive functions that operate over lists.
First, here is how to compute the length of a list.  We put *implicit*
function arguments in curly braces, asking Coq to infer them at call
sites.
|*)

Fixpoint length {A} (ls : list A) : nat :=
  match ls with
  | nil => 0
  | _ :: ls' => 1 + length ls'
  end.

(*|
Here are two classic recursive functions on lists.

- First,
  concatenation:
|*)

Fixpoint app {A} (ls1 ls2 : list A) : list A :=
  match ls1 with
  | nil => ls2
  | x :: ls1' => x :: app ls1' ls2
  end.

Infix "++" := app.

(*|
- Next,
  reversal:
|*)

Fixpoint rev {A} (ls : list A) : list A :=
  match ls with
  | nil => nil
  | x :: ls' => rev ls' ++ [x]
  end.

Theorem length_app : forall A (ls1 ls2 : list A),
    length (ls1 ++ ls2) = length ls1 + length ls2.
Proof.
  induction ls1; simpl in *; congruence.
Qed.

(*|
One of the classic gotchas in functional-programming class is how slow this
naive `rev` is.  Each `app` operation requires linear time, so running
linearly many `app`s brings us to quadratic time for `rev`.  Using a helper
function, we can bring `rev` to its optimal linear time.
|*)

Fixpoint rev_append {A} (ls acc : list A) : list A :=
  match ls with
  | nil => acc
  | x :: ls' => rev_append ls' (x :: acc)
  end.

(*|
This function `rev_append` takes an extra *accumulator* argument, in which we
gradually build up the original input in reversed order.  The base case just
returns the accumulator.  Now reversal just needs to do a `rev_append` with
an empty initial accumulator.
|*)

Definition rev' {A} (ls : list A) : list A :=
  rev_append ls [].

(*|
A few test destruct can help convince us that this seems to work.

.. coq:: unfold
|*)

Compute rev [1; 2; 3; 4].
Compute rev' [1; 2; 3; 4].
Compute rev ["hi"; "bye"; "sky"].
Compute rev' ["hi"; "bye"; "sky"].

(*|
OK, great.  Now it seems worth investing in a correctness proof.  We'll
discover it interactively in class, but here's a worked-out final
answer, with the several lemmas that we discover are useful.

List concatenation is associative.
|*)

Lemma app_assoc : forall A (ls1 ls2 ls3 : list A),
    (ls1 ++ ls2) ++ ls3 = ls1 ++ (ls2 ++ ls3).
Proof.
  induction ls1; simpl; intros.
  - congruence.
  - rewrite IHls1.
    congruence.
Qed.

(*|
The natural correctness condition for `rev_append`: it does what it says on
the package, combining reversal with appending!
|*)

Lemma rev_append_ok : forall A (ls acc : list A),
    rev_append ls acc = rev ls ++ acc.
Proof.
  induction ls; simpl; intros.
  - congruence.
  - rewrite IHls.
    rewrite app_assoc.
    simpl.
    congruence.
Qed.

(*|
.. note::

   The proof above would *not* have worked with the orders of
   variables `ls` and `acc` swapped in the lemma statement!  Try the proof that
   way to see what goes wrong.  The problem is that the induction hypothesis
   would be too weak.  A single `acc` value would be fixed for the whole proof,
   while we need `acc` to *vary* throughout the induction, by retaining a
   universal quantifier for it in the induction hypothesis.

Concatenating the empty list has no effect.
|*)

Lemma app_nil : forall A (ls : list A),
    ls ++ [] = ls.
Proof.
  induction ls; simpl.
  all: congruence.
Qed.

(*|
Now we can prove equivalence of `rev'` and `rev`, with no new induction.
|*)

Theorem rev'_ok : forall A (ls : list A),
    rev' ls = rev ls.
Proof.
  intros.
  unfold rev'.
  rewrite rev_append_ok.
  apply app_nil.
Qed.

(*|
Zipping and unzipping
---------------------

Another classic pair of list operations is zipping and unzipping.
These functions convert between pairs of lists and lists of pairs.
|*)

Fixpoint zip {A1 A2} (ls1 : list A1) (ls2 : list A2) : list (A1 * A2) :=
  match ls1, ls2 with
  | x1 :: ls1', x2 :: ls2' => (x1, x2) :: zip ls1' ls2'
  | _, _ => []
  end.

(*|
Note how, when passed two lengths of different lists, `zip` drops the
mismatched suffix of the longer list.

An explicit `Set` annotation is needed here, for obscure type-inference
reasons.
|*)

Fixpoint unzip {A1 A2 : Set} (ls : list (A1 * A2))
  : list A1 * list A2 :=
  match ls with
  | [] => ([], [])
  | (x1, x2) :: ls' =>
    let (ls1, ls2) := unzip ls' in
    (x1 :: ls1, x2 :: ls2)
  end.

(*|
A few common-sense properties hold of these definitions.
|*)

Theorem length_zip : forall A1 A2 (ls1 : list A1) (ls2 : list A2),
    length (zip ls1 ls2) = min (length ls1) (length ls2).
Proof.
  induction ls1; simpl; intros.
  - lia.
  - destruct ls2; simpl; intros.
    + lia.
    + rewrite IHls1.
      lia.
Qed.

(*|
We write `fst` and `snd` for the first and second projection operators on
pairs, respectively.
|*)

Theorem length_unzip1 : forall (A1 A2 : Set) (ls : list (A1 * A2)),
    length (fst (unzip ls)) = length ls.
Proof.
  induction ls; simpl; intros.
  - congruence.
  -
  destruct hd.

(*|
Note how `destruct` allows us to pull apart a pair into its two pieces.
|*)

  destruct (unzip ls).
  simpl in *.
  congruence.
Qed.

Theorem length_unzip2 : forall (A1 A2 : Set) (ls : list (A1 * A2)),
    length (snd (unzip ls)) = length ls.
Proof.
  induction ls; simpl; intros.
  - congruence.
  - destruct hd.
    destruct (unzip ls).
    simpl in *.
    congruence.
Qed.

Theorem zip_unzip : forall (A1 A2 : Set) (ls : list (A1 * A2)),
    (let (ls1, ls2) := unzip ls in zip ls1 ls2) = ls.
Proof.
  induction ls; simpl; intros.
  - congruence.
  - destruct hd.
    destruct (unzip ls).
    simpl.
    congruence.
Qed.

(*|
There are also interesting interactions with `app` and `rev`.
|*)

Theorem unzip_app : forall (A1 A2 : Set) (x y : list (A1 * A2)),
    unzip (x ++ y)
    = (let (x1, x2) := unzip x in
       let (y1, y2) := unzip y in
       (x1 ++ y1, x2 ++ y2)).
Proof.
  induction x; simpl; intros.
  - destruct (unzip y).
    congruence.
  - rewrite IHx.
    destruct hd, (unzip x), (unzip y).
    simpl in *; congruence.
Qed.

Theorem unzip_rev : forall (A1 A2 : Set) (ls : list (A1 * A2)),
    unzip (rev ls) = (let (ls1, ls2) := unzip ls in
                      (rev ls1, rev ls2)).
Proof.
  induction ls; simpl; intros.
  - congruence.
  - rewrite unzip_app, IHls; simpl.
    destruct hd, (unzip ls); simpl in *.
    congruence.
Qed.

(*|
Binary trees
============

Another classic datatype is binary trees, which we can define like so.
|*)

Inductive tree {A : Set} : Set :=
| Leaf
| Node (l : @tree A) (d : A) (r : @tree A).

Arguments tree: clear implicits.

Example tr1 : tree nat :=
  Node (Node Leaf 7 Leaf) 8 (Node Leaf 9 (Node Leaf 10 Leaf)).

(*|
There is a natural notion of size of a tree.
|*)

Fixpoint size {A} (t : tree A) : nat :=
  match t with
  | Leaf => 0
  | Node l _ r => 1 + size l + size r
  end.

(*|
There is also a natural sense of reversing a tree, flipping it around its
vertical axis.
|*)

Fixpoint reverse {A} (t : tree A) : tree A :=
  match t with
  | Leaf => Leaf
  | Node l d r => Node (reverse r) d (reverse l)
  end.

(*|
There is a natural relationship between the two.
|*)

Theorem size_reverse : forall A (t : tree A),
    size (reverse t) = size t.
Proof.
  induction t; simpl; (congruence || lia).
Qed.

(*|
Another classic tree operation is flattening into lists.
|*)

Fixpoint flatten {A} (t : tree A) : list A :=
  match t with
  | Leaf => []
  | Node l d r => flatten l ++ d :: flatten r
  end.

(*|
Note here that operators `++` and `::` are right-associative.
|*)

Theorem length_flatten : forall A (t : tree A),
    length (flatten t) = size t.
Proof.
  induction t; simpl; intros.
  - congruence.
  - rewrite length_app; simpl in *; lia.
Qed.

Lemma rev_app : forall A (ls1 ls2 : list A),
    rev (ls1 ++ ls2) = rev ls2 ++ rev ls1.
Proof.
  induction ls1; simpl; intros.
  - rewrite app_nil.
    congruence.
  - rewrite IHls1.
    apply app_assoc.
Qed.

Theorem rev_flatten : forall A (t : tree A),
    rev (flatten t) = flatten (reverse t).
Proof.
  induction t; simpl; intros.
  - congruence.
  - rewrite rev_app; simpl in *.
    rewrite app_assoc; simpl in *.
    congruence.
Qed.

(*|
Syntax trees
============

Trees are particularly important to us in studying program proof, since it is
natural to represent programs as *syntax trees*.  Here's a quick example, for
a tiny imperative language.
|*)

Notation var := string.

Inductive expression : Set :=
| Const (n : nat)
| Var (x : var)
| Plus (e1 e2 : expression)
| Minus (e1 e2 : expression)
| Times (e1 e2 : expression)
| GreaterThan (e1 e2 : expression)
| Not (e : expression).

Inductive statement : Set :=
| Assign (x : var) (e : expression)
| Sequence (s1 s2 : statement)
| IfThenElse (e : expression) (s1 s2 : statement)
| WhileLoop (e : expression) (s : statement).

(*|
First, here's a quick sample of nifty notations to write
almost-natural-looking embedded programs in Coq.
|*)

Declare Custom Entry prog.

Coercion Const : nat >-> expression.
Coercion Var : string >-> expression.

Infix "+" := Plus (in custom prog at level 50).
Infix "-" := Minus (in custom prog at level 50).
Infix "*" := Times  (in custom prog at level 40).
Infix ">" := GreaterThan (in custom prog at level 70).
Infix ":=" := Assign (in custom prog at level 75).
Infix ";" := Sequence (in custom prog at level 76).
Notation "'if'  ( e )  { s1 } 'else' { s2 }" :=
  (IfThenElse e s1 s2) (in custom prog at level 75).
Notation "'while'  ( e )  { s }" :=
  (WhileLoop e s) (in custom prog at level 75).
Notation "c" :=
  c (in custom prog at level 1, c constr at level 0).

Notation "[[ e ]]" := e (e custom prog).

Example factorial := [[
      "answer" := 1;
      while ("input" > 0) {
          "answer" := "answer" * "input";
          "input" := "input" - 1
        }
  ]].

(*|
A variety of compiler-style operations can be coded on top of this type.
Here's one to count total variable occurrences.
|*)

Fixpoint varsInExpression (e : expression) : nat :=
  match e with
  | Const _ => 0
  | Var _ => 1
  | Plus e1 e2
  | Minus e1 e2
  | Times e1 e2
  | GreaterThan e1 e2 =>
      varsInExpression e1 + varsInExpression e2
  | Not e1 => varsInExpression e1
  end.

Fixpoint varsInStatement (s : statement) : nat :=
  match s with
  | Assign _ e => 1 + varsInExpression e
  | Sequence s1 s2 =>
      varsInStatement s1 + varsInStatement s2
  | IfThenElse e s1 s2 =>
      varsInExpression e + varsInStatement s1 + varsInStatement s2
  | WhileLoop e s1 =>
      varsInExpression e + varsInStatement s1
  end.

(*|
We will need to wait for a few more lectures' worth of conceptual progress
before we can prove that transformations on programs preserve meaning, but we
do already have enough tools that prove that transformations preserve more
basic properties, like number of variables.  Here's one such transformation,
which flips "then" and "else" cases while also negating "if" conditions.
|*)

Fixpoint flipper (s : statement) : statement :=
  match s with
  | Assign _ _ => s
  | Sequence s1 s2 =>
      Sequence (flipper s1) (flipper s2)
  | IfThenElse e s1 s2 =>
      IfThenElse (Not e) (flipper s2) (flipper s1)
  | WhileLoop e s1 =>
      WhileLoop e (flipper s1)
  end.

Theorem varsIn_flipper : forall s,
    varsInStatement (flipper s) = varsInStatement s.
Proof.
  induction s; simpl; intros.
  all: congruence || lia.
Qed.

(*|
Just for the sheer madcap fun of it, let's write some translations of
programs into our lists from before, with variables as data values.
|*)

Fixpoint listifyExpression (e : expression) : list var :=
  match e with
  | Const _ => []
  | Var x => [x]
  | Plus e1 e2
  | Minus e1 e2
  | Times e1 e2
  | GreaterThan e1 e2 => listifyExpression e1 ++ listifyExpression e2
  | Not e1 => listifyExpression e1
  end.

Fixpoint listifyStatement (s : statement) : list var :=
  match s with
  | Assign x e => x :: listifyExpression e
  | Sequence s1 s2 =>
      listifyStatement s1 ++ listifyStatement s2
  | IfThenElse e s1 s2 =>
      listifyExpression e ++ listifyStatement s1 ++ listifyStatement s2
  | WhileLoop e s1 =>
      listifyExpression e ++ listifyStatement s1
  end.

Compute listifyStatement factorial. (* .unfold *)

(*|
At this point, I can't resist switching to a more automated proof style,
though still a pretty tame one.
|*)

#[local] Hint Rewrite length_app : poly.

Lemma length_listifyExpression : forall e,
    length (listifyExpression e) = varsInExpression e.
Proof.
  induction e; simpl; autorewrite with poly; lia.
Qed.

#[local] Hint Rewrite length_listifyExpression : poly.

Theorem length_listifyStatement : forall s,
    length (listifyStatement s) = varsInStatement s.
Proof.
  induction s; simpl; autorewrite with poly; lia.
Qed.

(*|
Other transformations are also possible, like the Swedish-Chef optimization,
which turns every variable into "bork".  It saves many bits when most variable
names are longer than 4 characters.
|*)

Fixpoint swedishExpression (e : expression) : expression :=
  match e with
  | Const _ => e
  | Var _ => Var "bork"
  | Plus e1 e2 =>
      Plus (swedishExpression e1) (swedishExpression e2)
  | Minus e1 e2 =>
      Minus (swedishExpression e1) (swedishExpression e2)
  | Times e1 e2 =>
      Times (swedishExpression e1) (swedishExpression e2)
  | GreaterThan e1 e2 =>
      GreaterThan (swedishExpression e1) (swedishExpression e2)
  | Not e1 =>
      Not (swedishExpression e1)
  end.

Fixpoint swedishStatement (s : statement) : statement :=
  match s with
  | Assign _ e => Assign "bork" (swedishExpression e)
  | Sequence s1 s2 =>
      Sequence (swedishStatement s1) (swedishStatement s2)
  | IfThenElse e s1 s2 =>
      IfThenElse (swedishExpression e)
        (swedishStatement s1) (swedishStatement s2)
  | WhileLoop e s1 =>
      WhileLoop (swedishExpression e) (swedishStatement s1)
  end.

Compute swedishStatement factorial. (* .unfold *)

Fixpoint swedishList (ls : list var) : list var :=
  match ls with
  | [] => []
  | _ :: ls => "bork" :: swedishList ls
  end.

Lemma swedishList_app : forall ls1 ls2,
    swedishList (ls1 ++ ls2) = swedishList ls1 ++ swedishList ls2.
Proof.
  induction ls1; simpl in *; congruence.
Qed.

#[local] Hint Rewrite swedishList_app : poly.

Lemma listifyExpression_swedishExpression : forall e,
    listifyExpression (swedishExpression e) =
      swedishList (listifyExpression e).
Proof.
  induction e; simpl in *; autorewrite with poly; congruence.
Qed.

#[local] Hint Rewrite listifyExpression_swedishExpression : poly.

Lemma listifyStatement_swedishStatement : forall s,
    listifyStatement (swedishStatement s) =
      swedishList (listifyStatement s).
Proof.
  induction s; simpl in *; autorewrite with poly; congruence.
Qed.