src/Tools/quickcheck.ML
author wenzelm
Sat, 08 Jan 2011 17:14:48 +0100
changeset 41720 f6ab14e61604
parent 41334 b4cccce25d9a
child 41765 7267fb5b724b
permissions -rw-r--r--
misc tuning and comments based on review of Theory_Data, Proof_Data, Generic_Data usage;
     1 (*  Title:      Tools/quickcheck.ML
     2     Author:     Stefan Berghofer, Florian Haftmann, Lukas Bulwahn, TU Muenchen
     3 
     4 Generic counterexample search engine.
     5 *)
     6 
     7 signature QUICKCHECK =
     8 sig
     9   val setup: theory -> theory
    10   (* configuration *)
    11   val auto: bool Unsynchronized.ref
    12   val timing : bool Unsynchronized.ref
    13   val tester : string Config.T 
    14   val size : int Config.T
    15   val iterations : int Config.T
    16   val no_assms : bool Config.T
    17   val report : bool Config.T
    18   val quiet : bool Config.T
    19   val timeout : real Config.T
    20   val finite_types : bool Config.T
    21   val finite_type_size : int Config.T
    22   datatype report = Report of
    23     { iterations : int, raised_match_errors : int,
    24       satisfied_assms : int list, positive_concl_tests : int }
    25   datatype expectation = No_Expectation | No_Counterexample | Counterexample;  
    26   datatype test_params = Test_Params of {default_type: typ list, expect : expectation};
    27   val test_params_of : Proof.context -> test_params
    28   val map_test_params : (typ list * expectation -> typ list * expectation)
    29     -> Context.generic -> Context.generic
    30   val add_generator:
    31     string * (Proof.context -> term -> int -> term list option * report option)
    32       -> Context.generic -> Context.generic
    33   (* testing terms and proof states *)
    34   val test_term: Proof.context -> bool -> term ->
    35     (string * term) list option * ((string * int) list * ((int * report) list) option)
    36   val test_goal_terms:
    37     Proof.context -> bool -> (string * typ) list -> term list
    38       -> (string * term) list option * ((string * int) list * ((int * report) list) option) list
    39   val quickcheck: (string * string list) list -> int -> Proof.state -> (string * term) list option
    40 end;
    41 
    42 structure Quickcheck : QUICKCHECK =
    43 struct
    44 
    45 (* preferences *)
    46 
    47 val auto = Unsynchronized.ref false;
    48 
    49 val timing = Unsynchronized.ref false;
    50 
    51 val _ =
    52   ProofGeneralPgip.add_preference Preferences.category_tracing
    53   (Unsynchronized.setmp auto true (fn () =>
    54     Preferences.bool_pref auto
    55       "auto-quickcheck"
    56       "Run Quickcheck automatically.") ());
    57 
    58 (* quickcheck report *)
    59 
    60 datatype report = Report of
    61   { iterations : int, raised_match_errors : int,
    62     satisfied_assms : int list, positive_concl_tests : int }
    63 
    64 (* expectation *)
    65 
    66 datatype expectation = No_Expectation | No_Counterexample | Counterexample; 
    67 
    68 fun merge_expectation (expect1, expect2) =
    69   if expect1 = expect2 then expect1 else No_Expectation
    70 
    71 (* quickcheck configuration -- default parameters, test generators *)
    72 val (tester, setup_tester) = Attrib.config_string "quickcheck_tester" (K "")
    73 val (size, setup_size) = Attrib.config_int "quickcheck_size" (K 10)
    74 val (iterations, setup_iterations) = Attrib.config_int "quickcheck_iterations" (K 100)
    75 val (no_assms, setup_no_assms) = Attrib.config_bool "quickcheck_no_assms" (K false)
    76 val (report, setup_report) = Attrib.config_bool "quickcheck_report" (K true)
    77 val (quiet, setup_quiet) = Attrib.config_bool "quickcheck_quiet" (K false)
    78 val (timeout, setup_timeout) = Attrib.config_real "quickcheck_timeout" (K 30.0)
    79 val (finite_types, setup_finite_types) = Attrib.config_bool "quickcheck_finite_types" (K true)
    80 val (finite_type_size, setup_finite_type_size) = Attrib.config_int "quickcheck_finite_type_size" (K 3)
    81 
    82 val setup_config =
    83   setup_tester #> setup_size #> setup_iterations #> setup_no_assms #> setup_report #> setup_quiet
    84     #> setup_timeout #> setup_finite_types #> setup_finite_type_size
    85 
    86 datatype test_params = Test_Params of
    87   {default_type: typ list, expect : expectation};
    88 
    89 fun dest_test_params (Test_Params {default_type, expect}) = (default_type, expect);
    90 
    91 fun make_test_params (default_type, expect) = Test_Params {default_type = default_type, expect = expect};
    92 
    93 fun map_test_params' f (Test_Params {default_type, expect}) = make_test_params (f (default_type, expect));
    94 
    95 fun merge_test_params
    96   (Test_Params {default_type = default_type1, expect = expect1},
    97     Test_Params {default_type = default_type2, expect = expect2}) =
    98   make_test_params
    99     (merge (op =) (default_type1, default_type2), merge_expectation (expect1, expect2));
   100 
   101 structure Data = Generic_Data
   102 (
   103   type T =
   104     (string * (Proof.context -> term -> int -> term list option * report option)) list
   105       * test_params;
   106   val empty = ([], Test_Params {default_type = [], expect = No_Expectation});
   107   val extend = I;
   108   fun merge ((generators1, params1), (generators2, params2)) : T =
   109     (AList.merge (op =) (K true) (generators1, generators2),
   110       merge_test_params (params1, params2));
   111 );
   112 
   113 val test_params_of = snd o Data.get o Context.Proof;
   114 
   115 val default_type = fst o dest_test_params o test_params_of
   116 
   117 val expect = snd o dest_test_params o test_params_of
   118 
   119 val map_test_params = Data.map o apsnd o map_test_params'
   120 
   121 val add_generator = Data.map o apfst o AList.update (op =);
   122 
   123 (* generating tests *)
   124 
   125 fun mk_tester ctxt t =
   126   let
   127     val name = Config.get ctxt tester
   128     val tester = case AList.lookup (op =) ((fst o Data.get o Context.Proof) ctxt) name
   129       of NONE => error ("No such quickcheck tester: " ^ name)
   130       | SOME tester => tester ctxt;
   131   in
   132     if Config.get ctxt quiet then
   133       try tester t
   134     else
   135       let
   136         val tester = Exn.interruptible_capture tester t
   137       in case Exn.get_result tester of
   138           NONE => SOME (Exn.release tester)
   139         | SOME tester => SOME tester
   140       end
   141   end
   142 
   143 (* testing propositions *)
   144 
   145 fun prep_test_term t =
   146   let
   147     val _ = (null (Term.add_tvars t []) andalso null (Term.add_tfrees t [])) orelse
   148       error "Term to be tested contains type variables";
   149     val _ = null (Term.add_vars t []) orelse
   150       error "Term to be tested contains schematic variables";
   151     val frees = Term.add_frees t [];
   152   in (frees, list_abs_free (frees, t)) end
   153 
   154 fun cpu_time description f =
   155   let
   156     val start = start_timing ()
   157     val result = Exn.capture f ()
   158     val time = Time.toMilliseconds (#cpu (end_timing start))
   159   in (Exn.release result, (description, time)) end
   160   
   161 fun test_term ctxt is_interactive t =
   162   let
   163     val (names, t') = apfst (map fst) (prep_test_term t);
   164     val current_size = Unsynchronized.ref 0
   165     fun excipit s =
   166       "Quickcheck " ^ s ^ " while testing at size " ^ string_of_int (!current_size)
   167     val (test_fun, comp_time) = cpu_time "quickcheck compilation" (fn () => mk_tester ctxt t');
   168     fun with_size test_fun k reports =
   169       if k > Config.get ctxt size then
   170         (NONE, reports)
   171       else
   172         let
   173           val _ = if Config.get ctxt quiet then () else Output.urgent_message
   174             ("Test data size: " ^ string_of_int k)
   175           val _ = current_size := k
   176           val ((result, new_report), timing) =
   177             cpu_time ("size " ^ string_of_int k) (fn () => test_fun (k - 1))
   178           val reports = case new_report of NONE => reports | SOME rep => (k, rep) :: reports
   179         in case result of NONE => with_size test_fun (k + 1) reports | SOME q => (SOME q, reports) end;
   180   in
   181     case test_fun of NONE => (NONE, ([comp_time], NONE)) 
   182       | SOME test_fun =>
   183         TimeLimit.timeLimit (seconds (Config.get ctxt timeout)) (fn () =>
   184           let
   185             val ((result, reports), exec_time) =
   186               cpu_time "quickcheck execution" (fn () => with_size test_fun 1 [])
   187           in
   188             (case result of NONE => NONE | SOME ts => SOME (names ~~ ts),
   189               ([exec_time, comp_time],
   190                if Config.get ctxt report andalso not (null reports) then SOME reports else NONE))
   191           end) ()
   192         handle TimeLimit.TimeOut =>
   193           if is_interactive then error (excipit "ran out of time") else raise TimeLimit.TimeOut
   194   end;
   195 
   196 (* FIXME: this function shows that many assumptions are made upon the generation *)
   197 (* In the end there is probably no generic quickcheck interface left... *)
   198 
   199 fun test_term_with_increasing_cardinality ctxt is_interactive ts =
   200   let
   201     val thy = ProofContext.theory_of ctxt
   202     val (freess, ts') = split_list (map prep_test_term ts)
   203     val Ts = map snd (hd freess)
   204     val (test_funs, comp_time) = cpu_time "quickcheck compilation"
   205       (fn () => map (mk_tester ctxt) ts')
   206     fun test_card_size (card, size) =
   207       (* FIXME: why decrement size by one? *)
   208       case fst (the (nth test_funs (card - 1)) (size - 1)) of
   209         SOME ts => SOME (map fst (nth freess (card - 1)) ~~ ts)
   210       | NONE => NONE
   211     val enumeration_card_size =
   212       if forall (fn T => Sign.of_sort thy (T,  ["Enum.enum"])) Ts then
   213         (* size does not matter *)
   214         map (rpair 0) (1 upto (length ts))
   215       else
   216         (* size does matter *)
   217         map_product pair (1 upto (length ts)) (1 upto (Config.get ctxt size))
   218         |> sort (fn ((c1, s1), (c2, s2)) => int_ord ((c1 + s1), (c2 + s2)))
   219     in
   220       if (forall is_none test_funs) then
   221         (NONE, ([comp_time], NONE))
   222       else if (forall is_some test_funs) then
   223         TimeLimit.timeLimit (seconds (Config.get ctxt timeout)) (fn () =>
   224           (get_first test_card_size enumeration_card_size, ([comp_time], NONE))) ()
   225         handle TimeLimit.TimeOut =>
   226           if is_interactive then error ("Quickcheck ran out of time") else raise TimeLimit.TimeOut
   227       else
   228         error "Unexpectedly, testers of some cardinalities are executable but others are not"       
   229     end
   230 
   231 fun get_finite_types ctxt =
   232   fst (chop (Config.get ctxt finite_type_size)
   233     (map (Type o rpair []) ["Enum.finite_1", "Enum.finite_2", "Enum.finite_3",
   234      "Enum.finite_4", "Enum.finite_5"]))  
   235 
   236 exception WELLSORTED of string
   237 
   238 fun monomorphic_term thy insts default_T = 
   239   let
   240     fun subst (T as TFree (v, S)) =
   241       let
   242         val T' = AList.lookup (op =) insts v
   243           |> the_default default_T
   244       in if Sign.of_sort thy (T', S) then T'
   245         else raise (WELLSORTED ("For instantiation with default_type " ^ Syntax.string_of_typ_global thy default_T ^
   246           ":\n" ^ Syntax.string_of_typ_global thy T' ^
   247           " to be substituted for variable " ^
   248           Syntax.string_of_typ_global thy T ^ " does not have sort " ^
   249           Syntax.string_of_sort_global thy S))
   250       end
   251       | subst T = T;
   252   in (map_types o map_atyps) subst end;
   253 
   254 datatype wellsorted_error = Wellsorted_Error of string | Term of term
   255 
   256 fun test_goal_terms lthy is_interactive insts check_goals =
   257   let
   258     val thy = ProofContext.theory_of lthy 
   259     val default_insts =
   260       if Config.get lthy finite_types then (get_finite_types lthy) else (default_type lthy)
   261     val inst_goals =
   262       map (fn check_goal =>
   263         if not (null (Term.add_tfree_names check_goal [])) then
   264           map (fn T =>
   265             (pair (SOME T) o Term o Object_Logic.atomize_term thy o monomorphic_term thy insts T) check_goal
   266               handle WELLSORTED s => (SOME T, Wellsorted_Error s)) default_insts
   267         else
   268           [(NONE, Term (Object_Logic.atomize_term thy check_goal))]) check_goals
   269     val error_msg = cat_lines (maps (map_filter (fn (_, Term t) => NONE | (_, Wellsorted_Error s) => SOME s)) inst_goals)
   270     fun is_wellsorted_term (T, Term t) = SOME (T, t)
   271       | is_wellsorted_term (_, Wellsorted_Error s) = NONE
   272     val correct_inst_goals =
   273       case map (map_filter is_wellsorted_term) inst_goals of
   274         [[]] => error error_msg
   275       | xs => xs
   276     val _ = if Config.get lthy quiet then () else warning error_msg
   277     fun collect_results f reports [] = (NONE, rev reports)
   278       | collect_results f reports (t :: ts) =
   279         case f t of
   280           (SOME res, report) => (SOME res, rev (report :: reports))
   281         | (NONE, report) => collect_results f (report :: reports) ts
   282     fun test_term' goal =
   283       case goal of
   284         [(NONE, t)] => test_term lthy is_interactive t
   285       | ts => test_term_with_increasing_cardinality lthy is_interactive (map snd ts)                           
   286   in
   287     if Config.get lthy finite_types then
   288       collect_results test_term' [] correct_inst_goals
   289     else
   290       collect_results (test_term lthy is_interactive) [] (maps (map snd) correct_inst_goals)
   291   end;
   292 
   293 fun test_goal insts i state =
   294   let
   295     val lthy = Proof.context_of state;
   296     val thy = Proof.theory_of state;
   297     fun strip (Const ("all", _) $ Abs (_, _, t)) = strip t
   298       | strip t = t;
   299     val {goal = st, ...} = Proof.raw_goal state;
   300     val (gi, frees) = Logic.goal_params (prop_of st) i;
   301     val some_locale = case (Option.map #target o Named_Target.peek) lthy
   302      of NONE => NONE
   303       | SOME "" => NONE
   304       | SOME locale => SOME locale;
   305     val assms = if Config.get lthy no_assms then [] else case some_locale
   306      of NONE => Assumption.all_assms_of lthy
   307       | SOME locale => Assumption.local_assms_of lthy (Locale.init locale thy);
   308     val proto_goal = Logic.list_implies (map Thm.term_of assms, subst_bounds (frees, strip gi));
   309     val check_goals = case some_locale
   310      of NONE => [proto_goal]
   311       | SOME locale => map (fn (_, phi) => Morphism.term phi proto_goal)
   312         (Locale.registrations_of (Context.Theory thy) (*FIXME*) locale);
   313   in
   314     test_goal_terms lthy true insts check_goals
   315   end
   316 
   317 (* pretty printing *)
   318 
   319 fun tool_name auto = (if auto then "Auto " else "") ^ "Quickcheck"
   320 
   321 fun pretty_counterex ctxt auto NONE = Pretty.str (tool_name auto ^ " found no counterexample.")
   322   | pretty_counterex ctxt auto (SOME cex) =
   323       Pretty.chunks (Pretty.str (tool_name auto ^ " found a counterexample:\n") ::
   324         map (fn (s, t) =>
   325           Pretty.block [Pretty.str (s ^ " ="), Pretty.brk 1, Syntax.pretty_term ctxt t]) (rev cex));
   326 
   327 fun pretty_report (Report {iterations = iterations, raised_match_errors = raised_match_errors,
   328     satisfied_assms = satisfied_assms, positive_concl_tests = positive_concl_tests}) =
   329   let
   330     fun pretty_stat s i = Pretty.block ([Pretty.str (s ^ ": " ^ string_of_int i)])
   331   in
   332      ([pretty_stat "iterations" iterations,
   333      pretty_stat "match exceptions" raised_match_errors]
   334      @ map_index (fn (i, n) => pretty_stat ("satisfied " ^ string_of_int (i + 1) ^ ". assumption") n)
   335        satisfied_assms
   336      @ [pretty_stat "positive conclusion tests" positive_concl_tests])
   337   end
   338 
   339 fun pretty_reports ctxt (SOME reports) =
   340   Pretty.chunks (Pretty.fbrk :: Pretty.str "Quickcheck report:" ::
   341     maps (fn (size, report) =>
   342       Pretty.str ("size " ^ string_of_int size ^ ":") :: pretty_report report @ [Pretty.brk 1])
   343       (rev reports))
   344   | pretty_reports ctxt NONE = Pretty.str ""
   345 
   346 fun pretty_counterex_and_reports ctxt auto (cex, timing_and_reports) =
   347   Pretty.chunks (pretty_counterex ctxt auto cex ::
   348     map (pretty_reports ctxt) (map snd timing_and_reports))
   349 
   350 (* automatic testing *)
   351 
   352 fun auto_quickcheck state =
   353   let
   354     val ctxt = Proof.context_of state;
   355     val res =
   356       state
   357       |> Proof.map_context (Config.put report false #> Config.put quiet true)
   358       |> try (test_goal [] 1);
   359   in
   360     case res of
   361       NONE => (false, state)
   362     | SOME (NONE, report) => (false, state)
   363     | SOME (cex, report) => (true, Proof.goal_message (K (Pretty.chunks [Pretty.str "",
   364         Pretty.mark Markup.hilite (pretty_counterex ctxt true cex)])) state)
   365   end
   366 
   367 val setup = Auto_Tools.register_tool (auto, auto_quickcheck)
   368   #> setup_config
   369 
   370 (* Isar commands *)
   371 
   372 fun read_nat s = case (Library.read_int o Symbol.explode) s
   373  of (k, []) => if k >= 0 then k
   374       else error ("Not a natural number: " ^ s)
   375   | (_, _ :: _) => error ("Not a natural number: " ^ s);
   376 
   377 fun read_bool "false" = false
   378   | read_bool "true" = true
   379   | read_bool s = error ("Not a Boolean value: " ^ s)
   380 
   381 fun read_real s =
   382   case (Real.fromString s) of
   383     SOME s => s
   384   | NONE => error ("Not a real number: " ^ s)
   385 
   386 fun read_expectation "no_expectation" = No_Expectation
   387   | read_expectation "no_counterexample" = No_Counterexample 
   388   | read_expectation "counterexample" = Counterexample
   389   | read_expectation s = error ("Not an expectation value: " ^ s)  
   390 
   391 fun valid_tester_name genctxt = AList.defined (op =) (fst (Data.get genctxt))
   392 
   393 fun parse_tester name genctxt =
   394   if valid_tester_name genctxt name then
   395     Config.put_generic tester name genctxt
   396   else
   397     error ("Unknown tester: " ^ name)
   398 
   399 fun parse_test_param ("tester", [arg]) = parse_tester arg
   400   | parse_test_param ("size", [arg]) = Config.put_generic size (read_nat arg)
   401   | parse_test_param ("iterations", [arg]) = Config.put_generic iterations (read_nat arg)
   402   | parse_test_param ("default_type", arg) = (fn gen_ctxt =>
   403     map_test_params ((apfst o K) (map (ProofContext.read_typ (Context.proof_of gen_ctxt)) arg)) gen_ctxt)
   404   | parse_test_param ("no_assms", [arg]) = Config.put_generic no_assms (read_bool arg)
   405   | parse_test_param ("expect", [arg]) = map_test_params ((apsnd o K) (read_expectation arg))
   406   | parse_test_param ("report", [arg]) = Config.put_generic report (read_bool arg)
   407   | parse_test_param ("quiet", [arg]) = Config.put_generic quiet (read_bool arg)
   408   | parse_test_param ("timeout", [arg]) = Config.put_generic timeout (read_real arg)
   409   | parse_test_param ("finite_types", [arg]) = Config.put_generic finite_types (read_bool arg)
   410   | parse_test_param ("finite_type_size", [arg]) = Config.put_generic finite_type_size (read_nat arg)
   411   | parse_test_param (name, _) = fn genctxt =>
   412     if valid_tester_name genctxt name then
   413       Config.put_generic tester name genctxt
   414     else error ("Unknown tester or test parameter: " ^ name);
   415 
   416 fun parse_test_param_inst (name, arg) (insts, ctxt) =
   417       case try (ProofContext.read_typ ctxt) name
   418        of SOME (TFree (v, _)) => (apfst o AList.update (op =))
   419               (v, ProofContext.read_typ ctxt (the_single arg)) (insts, ctxt)
   420         | _ => (apsnd o Context.proof_map o parse_test_param) (name, arg) (insts, ctxt);
   421 
   422 fun quickcheck_params_cmd args = Context.theory_map (fold parse_test_param args);
   423   
   424 fun gen_quickcheck args i state =
   425   state
   426   |> Proof.map_context_result (fn ctxt => fold parse_test_param_inst args ([], ctxt))
   427   |> (fn (insts, state') => test_goal insts i state'
   428   |> tap (fn (SOME x, _) => if expect (Proof.context_of state') = No_Counterexample then
   429                error ("quickcheck expected to find no counterexample but found one") else ()
   430            | (NONE, _) => if expect (Proof.context_of state') = Counterexample then
   431                error ("quickcheck expected to find a counterexample but did not find one") else ()))
   432 
   433 fun quickcheck args i state = fst (gen_quickcheck args i state);
   434 
   435 fun quickcheck_cmd args i state =
   436   gen_quickcheck args i (Toplevel.proof_of state)
   437   |> Pretty.writeln o pretty_counterex_and_reports (Toplevel.context_of state) false;
   438 
   439 val parse_arg = Parse.name -- (Scan.optional (Parse.$$$ "=" |-- 
   440   (((Parse.name || Parse.float_number) >> single) || (Parse.$$$ "[" |-- Parse.list1 Parse.name --| Parse.$$$ "]"))) ["true"]);
   441 
   442 val parse_args = Parse.$$$ "[" |-- Parse.list1 parse_arg --| Parse.$$$ "]"
   443   || Scan.succeed [];
   444 
   445 val _ =
   446   Outer_Syntax.command "quickcheck_params" "set parameters for random testing" Keyword.thy_decl
   447     (parse_args >> (fn args => Toplevel.theory (quickcheck_params_cmd args)));
   448 
   449 val _ =
   450   Outer_Syntax.improper_command "quickcheck" "try to find counterexample for subgoal" Keyword.diag
   451     (parse_args -- Scan.optional Parse.nat 1
   452       >> (fn (args, i) => Toplevel.no_timing o Toplevel.keep (quickcheck_cmd args i)));
   453 
   454 end;
   455 
   456 
   457 val auto_quickcheck = Quickcheck.auto;