src/Tools/quickcheck.ML
author bulwahn
Fri, 03 Dec 2010 08:40:47 +0100
changeset 41147 1332f6e856b9
parent 40904 36ca3fad1f31
child 41148 b3db5697aab4
permissions -rw-r--r--
corrected indentation
     1 (*  Title:      Tools/quickcheck.ML
     2     Author:     Stefan Berghofer, Florian Haftmann, Lukas Bulwahn, TU Muenchen
     3 
     4 Generic counterexample search engine.
     5 *)
     6 
     7 signature QUICKCHECK =
     8 sig
     9   val setup: theory -> theory
    10   (* configuration *)
    11   val auto: bool Unsynchronized.ref
    12   val timing : bool Unsynchronized.ref
    13   val size : int Config.T
    14   val iterations : int Config.T
    15   val no_assms : bool Config.T
    16   val report : bool Config.T
    17   val quiet : bool Config.T
    18   val timeout : real Config.T
    19   val finite_types : bool Config.T
    20   val finite_type_size : int Config.T
    21   datatype report = Report of
    22     { iterations : int, raised_match_errors : int,
    23       satisfied_assms : int list, positive_concl_tests : int }
    24   datatype expectation = No_Expectation | No_Counterexample | Counterexample;  
    25   datatype test_params = Test_Params of {default_type: typ list, expect : expectation};
    26   val test_params_of : Proof.context -> test_params
    27   val map_test_params : (typ list * expectation -> typ list * expectation)
    28     -> Context.generic -> Context.generic
    29   val add_generator:
    30     string * (Proof.context -> term -> int -> term list option * (bool list * bool))
    31       -> Context.generic -> Context.generic
    32   (* testing terms and proof states *)
    33   val test_term_small: Proof.context -> term ->
    34     (string * term) list option * ((string * int) list * ((int * report list) list) option)
    35   val test_term: Proof.context -> string option -> term ->
    36     (string * term) list option * ((string * int) list * ((int * report list) list) option)
    37   val test_goal_terms:
    38     Proof.context -> string option * (string * typ) list -> term list
    39       -> (string * term) list option * ((string * int) list * ((int * report list) list) option) list
    40   val quickcheck: (string * string list) list -> int -> Proof.state -> (string * term) list option
    41 end;
    42 
    43 structure Quickcheck : QUICKCHECK =
    44 struct
    45 
    46 (* preferences *)
    47 
    48 val auto = Unsynchronized.ref false;
    49 
    50 val timing = Unsynchronized.ref false;
    51 
    52 val _ =
    53   ProofGeneralPgip.add_preference Preferences.category_tracing
    54   (Unsynchronized.setmp auto true (fn () =>
    55     Preferences.bool_pref auto
    56       "auto-quickcheck"
    57       "Run Quickcheck automatically.") ());
    58 
    59 (* quickcheck report *)
    60 
    61 datatype single_report = Run of bool list * bool | MatchExc
    62 
    63 datatype report = Report of
    64   { iterations : int, raised_match_errors : int,
    65     satisfied_assms : int list, positive_concl_tests : int }
    66 
    67 fun collect_single_report single_report
    68     (Report {iterations = iterations, raised_match_errors = raised_match_errors,
    69     satisfied_assms = satisfied_assms, positive_concl_tests = positive_concl_tests}) =
    70   case single_report
    71   of MatchExc =>
    72     Report {iterations = iterations + 1, raised_match_errors = raised_match_errors + 1,
    73       satisfied_assms = satisfied_assms, positive_concl_tests = positive_concl_tests}
    74    | Run (assms, concl) =>
    75     Report {iterations = iterations + 1, raised_match_errors = raised_match_errors,
    76       satisfied_assms =
    77         map2 (fn b => fn s => if b then s + 1 else s) assms
    78          (if null satisfied_assms then replicate (length assms) 0 else satisfied_assms),
    79       positive_concl_tests = if concl then positive_concl_tests + 1 else positive_concl_tests}
    80 
    81 (* expectation *)
    82 
    83 datatype expectation = No_Expectation | No_Counterexample | Counterexample; 
    84 
    85 fun merge_expectation (expect1, expect2) =
    86   if expect1 = expect2 then expect1 else No_Expectation
    87 
    88 (* quickcheck configuration -- default parameters, test generators *)
    89 val (size, setup_size) = Attrib.config_int "quickcheck_size" (K 10)
    90 val (iterations, setup_iterations) = Attrib.config_int "quickcheck_iterations" (K 100)
    91 val (no_assms, setup_no_assms) = Attrib.config_bool "quickcheck_no_assms" (K false)
    92 val (report, setup_report) = Attrib.config_bool "quickcheck_report" (K true)
    93 val (quiet, setup_quiet) = Attrib.config_bool "quickcheck_quiet" (K false)
    94 val (timeout, setup_timeout) = Attrib.config_real "quickcheck_timeout" (K 30.0)
    95 val (finite_types, setup_finite_types) = Attrib.config_bool "quickcheck_finite_types" (K true)
    96 val (finite_type_size, setup_finite_type_size) = Attrib.config_int "quickcheck_finite_type_size" (K 3)
    97 
    98 val setup_config =
    99   setup_size #> setup_iterations #> setup_no_assms #> setup_report #> setup_quiet #> setup_timeout
   100     #> setup_finite_types #> setup_finite_type_size
   101 
   102 datatype test_params = Test_Params of
   103   {default_type: typ list, expect : expectation};
   104 
   105 fun dest_test_params (Test_Params {default_type, expect}) = (default_type, expect);
   106 
   107 fun make_test_params (default_type, expect) = Test_Params {default_type = default_type, expect = expect};
   108 
   109 fun map_test_params' f (Test_Params {default_type, expect}) = make_test_params (f (default_type, expect));
   110 
   111 fun merge_test_params
   112  (Test_Params {default_type = default_type1, expect = expect1},
   113   Test_Params {default_type = default_type2, expect = expect2}) =
   114   make_test_params (merge (op =) (default_type1, default_type2), merge_expectation (expect1, expect2));
   115 
   116 structure Data = Generic_Data
   117 (
   118   type T =
   119     (string * (Proof.context -> term -> int -> term list option * (bool list * bool))) list
   120       * test_params;
   121   val empty = ([], Test_Params {default_type = [], expect = No_Expectation});
   122   val extend = I;
   123   fun merge ((generators1, params1), (generators2, params2)) : T =
   124     (AList.merge (op =) (K true) (generators1, generators2),
   125       merge_test_params (params1, params2));
   126 );
   127 
   128 val test_params_of = snd o Data.get o Context.Proof;
   129 
   130 val default_type = fst o dest_test_params o test_params_of
   131 
   132 val expect = snd o dest_test_params o test_params_of
   133 
   134 val map_test_params = Data.map o apsnd o map_test_params'
   135 
   136 val add_generator = Data.map o apfst o AList.update (op =);
   137 
   138 (* generating tests *)
   139 
   140 fun mk_tester_select name ctxt =
   141   case AList.lookup (op =) ((fst o Data.get o Context.Proof) ctxt) name
   142    of NONE => error ("No such quickcheck generator: " ^ name)
   143     | SOME generator => generator ctxt;
   144 
   145 fun mk_testers ctxt t =
   146   (map snd o fst o Data.get o Context.Proof) ctxt
   147   |> map_filter (fn generator => try (generator ctxt) t);
   148 
   149 fun mk_testers_strict ctxt t =
   150   let
   151     val generators = ((map snd o fst o Data.get o Context.Proof) ctxt)
   152     val testers = map (fn generator => Exn.interruptible_capture (generator ctxt) t) generators;
   153   in
   154     if forall (is_none o Exn.get_result) testers
   155     then [(Exn.release o snd o split_last) testers]
   156     else map_filter Exn.get_result testers
   157   end;
   158 
   159 
   160 (* testing propositions *)
   161 
   162 fun prep_test_term t =
   163   let
   164     val _ = (null (Term.add_tvars t []) andalso null (Term.add_tfrees t [])) orelse
   165       error "Term to be tested contains type variables";
   166     val _ = null (Term.add_vars t []) orelse
   167       error "Term to be tested contains schematic variables";
   168     val frees = Term.add_frees t [];
   169   in (map fst frees, list_abs_free (frees, t)) end
   170 
   171 fun cpu_time description f =
   172   let
   173     val start = start_timing ()
   174     val result = Exn.capture f ()
   175     val time = Time.toMilliseconds (#cpu (end_timing start))
   176   in (Exn.release result, (description, time)) end
   177 
   178 fun test_term_small ctxt t =
   179   let
   180     val (names, t') = prep_test_term t;
   181     val current_size = Unsynchronized.ref 0
   182     fun excipit s =
   183       "Quickcheck " ^ s ^ " while testing at size " ^ string_of_int (!current_size)
   184     val (tester, comp_time) = cpu_time "compilation"
   185       (fn () => the (AList.lookup (op =) ((fst o Data.get o Context.Proof) ctxt) "small") ctxt t');
   186     val empty_report = Report { iterations = 0, raised_match_errors = 0,
   187       satisfied_assms = [], positive_concl_tests = 0 }
   188     fun with_size k timings =
   189       if k > Config.get ctxt size then (NONE, timings)
   190       else
   191         let
   192           val _ = if Config.get ctxt quiet then () else Output.urgent_message ("Test data size: " ^ string_of_int k);
   193           val _ = current_size := k
   194           val (result, timing) = cpu_time ("size " ^ string_of_int k)
   195             (fn () => (fst (tester k)) handle Match => (if Config.get ctxt quiet then ()
   196               else warning "Exception Match raised during quickcheck"; NONE))
   197         in
   198           case result of
   199             NONE => with_size (k + 1) (timing :: timings)
   200           | SOME q => (SOME q, (timing :: timings))
   201         end;
   202      val result = with_size 1 [comp_time]
   203    in
   204      apsnd (rpair NONE) (apfst (Option.map (curry (op ~~) names)) result)
   205    end
   206 
   207 (* we actually assume we know the generators and its behaviour *)
   208 fun is_iteratable "SML" = true
   209   | is_iteratable "random" = true
   210   | is_iteratable _ = false
   211   
   212 fun test_term ctxt generator_name t =
   213   let
   214     val (names, t') = prep_test_term t;
   215     val current_size = Unsynchronized.ref 0
   216     fun excipit s =
   217       "Quickcheck " ^ s ^ " while testing at size " ^ string_of_int (!current_size)
   218     val (testers, comp_time) = cpu_time "quickcheck compilation"
   219       (fn () => (case generator_name
   220        of NONE => if Config.get ctxt quiet then mk_testers ctxt t' else mk_testers_strict ctxt t'
   221         | SOME name => [mk_tester_select name ctxt t']));
   222     fun iterate f 0 report = (NONE, report)
   223       | iterate f j report =
   224         let
   225           val (test_result, single_report) = apsnd Run (f ()) handle Match => 
   226             (if Config.get ctxt quiet then ()
   227              else warning "Exception Match raised during quickcheck"; (NONE, MatchExc))
   228           val report = collect_single_report single_report report
   229         in
   230           case test_result of NONE => iterate f (j - 1) report | SOME q => (SOME q, report)
   231         end
   232     val empty_report = Report { iterations = 0, raised_match_errors = 0,
   233       satisfied_assms = [], positive_concl_tests = 0 }
   234     fun with_testers k [] = (NONE, [])
   235       | with_testers k (tester :: testers) =
   236         let
   237           val niterations = case generator_name of
   238             SOME generator_name =>
   239               if is_iteratable generator_name then Config.get ctxt iterations else 1
   240           | NONE => Config.get ctxt iterations
   241           val (result, timing) = cpu_time ("size " ^ string_of_int k)
   242             (fn () => iterate (fn () => tester (k - 1)) niterations empty_report)
   243         in
   244           case result
   245            of (NONE, report) => apsnd (cons report) (with_testers k testers)
   246             | (SOME q, report) => (SOME q, [report])
   247         end
   248     fun with_size k reports =
   249       if k > Config.get ctxt size then (NONE, reports)
   250       else
   251        (if Config.get ctxt quiet then () else Output.urgent_message ("Test data size: " ^ string_of_int k);
   252         let
   253           val _ = current_size := k
   254           val (result, new_report) = with_testers k testers
   255           val reports = ((k, new_report) :: reports)
   256         in case result of NONE => with_size (k + 1) reports | SOME q => (SOME q, reports) end);
   257     val ((result, reports), exec_time) =
   258       TimeLimit.timeLimit (seconds (Config.get ctxt timeout)) (fn () => cpu_time "quickcheck execution"
   259       (fn () => apfst
   260          (fn result => case result of NONE => NONE
   261         | SOME ts => SOME (names ~~ ts)) (with_size 1 []))) ()
   262       handle TimeLimit.TimeOut =>
   263         error (excipit "ran out of time")
   264      | Exn.Interrupt => error (excipit "was interrupted")  (* FIXME violates Isabelle/ML exception model *)
   265   in
   266     (result, ([exec_time, comp_time], if Config.get ctxt report then SOME reports else NONE))
   267   end;
   268 
   269 fun get_finite_types ctxt =
   270   fst (chop (Config.get ctxt finite_type_size)
   271     (map (Type o rpair []) ["Enum.finite_1", "Enum.finite_2", "Enum.finite_3",
   272      "Enum.finite_4", "Enum.finite_5"]))  
   273 
   274 exception WELLSORTED of string
   275 
   276 fun monomorphic_term thy insts default_T = 
   277   let
   278     fun subst (T as TFree (v, S)) =
   279       let
   280         val T' = AList.lookup (op =) insts v
   281           |> the_default default_T
   282       in if Sign.of_sort thy (T', S) then T'
   283         else raise (WELLSORTED ("For instantiation with default_type " ^ Syntax.string_of_typ_global thy default_T ^
   284           ":\n" ^ Syntax.string_of_typ_global thy T' ^
   285           " to be substituted for variable " ^
   286           Syntax.string_of_typ_global thy T ^ " does not have sort " ^
   287           Syntax.string_of_sort_global thy S))
   288       end
   289       | subst T = T;
   290   in (map_types o map_atyps) subst end;
   291 
   292 datatype wellsorted_error = Wellsorted_Error of string | Term of term
   293 
   294 fun test_goal_terms lthy (generator_name, insts) check_goals =
   295   let
   296     val thy = ProofContext.theory_of lthy 
   297     val inst_goals =
   298       if Config.get lthy finite_types then 
   299         maps (fn check_goal => map (fn T =>
   300           Term ((Object_Logic.atomize_term thy o monomorphic_term thy insts T) check_goal)
   301             handle WELLSORTED s => Wellsorted_Error s) (get_finite_types lthy)) check_goals
   302       else
   303         maps (fn check_goal => map (fn T =>
   304           Term ((Object_Logic.atomize_term thy o monomorphic_term thy insts T) check_goal)
   305             handle WELLSORTED s => Wellsorted_Error s) (default_type lthy)) check_goals
   306     val error_msg = cat_lines (map_filter (fn Term t => NONE | Wellsorted_Error s => SOME s) inst_goals)
   307     val correct_inst_goals =
   308       case map_filter (fn Term t => SOME t | Wellsorted_Error s => NONE) inst_goals of
   309         [] => error error_msg
   310       | xs => xs
   311     val _ = if Config.get lthy quiet then () else warning error_msg
   312     fun collect_results f reports [] = (NONE, rev reports)
   313       | collect_results f reports (t :: ts) =
   314         case f t of
   315           (SOME res, report) => (SOME res, rev (report :: reports))
   316         | (NONE, report) => collect_results f (report :: reports) ts
   317   in collect_results (test_term lthy generator_name) [] correct_inst_goals end;
   318 
   319 fun test_goal (generator_name, insts) i state =
   320   let
   321     val lthy = Proof.context_of state;
   322     val thy = Proof.theory_of state;
   323     fun strip (Const ("all", _) $ Abs (_, _, t)) = strip t
   324       | strip t = t;
   325     val {goal = st, ...} = Proof.raw_goal state;
   326     val (gi, frees) = Logic.goal_params (prop_of st) i;
   327     val some_locale = case (Option.map #target o Named_Target.peek) lthy
   328      of NONE => NONE
   329       | SOME "" => NONE
   330       | SOME locale => SOME locale;
   331     val assms = if Config.get lthy no_assms then [] else case some_locale
   332      of NONE => Assumption.all_assms_of lthy
   333       | SOME locale => Assumption.local_assms_of lthy (Locale.init locale thy);
   334     val proto_goal = Logic.list_implies (map Thm.term_of assms, subst_bounds (frees, strip gi));
   335     val check_goals = case some_locale
   336      of NONE => [proto_goal]
   337       | SOME locale => map (fn (_, phi) => Morphism.term phi proto_goal)
   338         (Locale.registrations_of (Context.Theory thy) (*FIXME*) locale);
   339   in
   340     test_goal_terms lthy (generator_name, insts) check_goals
   341   end
   342 
   343 (* pretty printing *)
   344 
   345 fun tool_name auto = (if auto then "Auto " else "") ^ "Quickcheck"
   346 
   347 fun pretty_counterex ctxt auto NONE = Pretty.str (tool_name auto ^ " found no counterexample.")
   348   | pretty_counterex ctxt auto (SOME cex) =
   349       Pretty.chunks (Pretty.str (tool_name auto ^ " found a counterexample:\n") ::
   350         map (fn (s, t) =>
   351           Pretty.block [Pretty.str (s ^ " ="), Pretty.brk 1, Syntax.pretty_term ctxt t]) cex);
   352 
   353 fun pretty_report (Report {iterations = iterations, raised_match_errors = raised_match_errors,
   354     satisfied_assms = satisfied_assms, positive_concl_tests = positive_concl_tests}) =
   355   let
   356     fun pretty_stat s i = Pretty.block ([Pretty.str (s ^ ": " ^ string_of_int i)])
   357   in
   358      ([pretty_stat "iterations" iterations,
   359      pretty_stat "match exceptions" raised_match_errors]
   360      @ map_index (fn (i, n) => pretty_stat ("satisfied " ^ string_of_int (i + 1) ^ ". assumption") n)
   361        satisfied_assms
   362      @ [pretty_stat "positive conclusion tests" positive_concl_tests])
   363   end
   364 
   365 fun pretty_reports' [report] = [Pretty.chunks (pretty_report report)]
   366   | pretty_reports' reports =
   367   map_index (fn (i, report) =>
   368     Pretty.chunks (Pretty.str (string_of_int (i + 1) ^ ". generator:\n") :: pretty_report report))
   369     reports
   370 
   371 fun pretty_reports ctxt (SOME reports) =
   372   Pretty.chunks (Pretty.str "Quickcheck report:" ::
   373     maps (fn (size, reports) =>
   374       Pretty.str ("size " ^ string_of_int size ^ ":") :: pretty_reports' reports @ [Pretty.brk 1])
   375       (rev reports))
   376   | pretty_reports ctxt NONE = Pretty.str ""
   377 
   378 fun pretty_counterex_and_reports ctxt auto (cex, timing_and_reports) =
   379   Pretty.chunks (pretty_counterex ctxt auto cex ::
   380     map (pretty_reports ctxt) (map snd timing_and_reports))
   381 
   382 (* automatic testing *)
   383 
   384 fun auto_quickcheck state =
   385   if not (!auto) then
   386     (false, state)
   387   else
   388     let
   389       val ctxt = Proof.context_of state;
   390       val res =
   391         state
   392         |> Proof.map_context (Config.put report false #> Config.put quiet true)
   393         |> try (test_goal (NONE, []) 1);
   394     in
   395       case res of
   396         NONE => (false, state)
   397       | SOME (NONE, report) => (false, state)
   398       | SOME (cex, report) => (true, Proof.goal_message (K (Pretty.chunks [Pretty.str "",
   399           Pretty.mark Markup.hilite (pretty_counterex ctxt true cex)])) state)
   400     end
   401 
   402 val setup = Auto_Tools.register_tool ("quickcheck", auto_quickcheck)
   403   #> setup_config
   404 
   405 (* Isar commands *)
   406 
   407 fun read_nat s = case (Library.read_int o Symbol.explode) s
   408  of (k, []) => if k >= 0 then k
   409       else error ("Not a natural number: " ^ s)
   410   | (_, _ :: _) => error ("Not a natural number: " ^ s);
   411 
   412 fun read_bool "false" = false
   413   | read_bool "true" = true
   414   | read_bool s = error ("Not a Boolean value: " ^ s)
   415 
   416 fun read_real s =
   417   case (Real.fromString s) of
   418     SOME s => s
   419   | NONE => error ("Not a real number: " ^ s)
   420 
   421 fun read_expectation "no_expectation" = No_Expectation
   422   | read_expectation "no_counterexample" = No_Counterexample 
   423   | read_expectation "counterexample" = Counterexample
   424   | read_expectation s = error ("Not an expectation value: " ^ s)  
   425 
   426 fun parse_test_param ("size", [arg]) = Config.put_generic size (read_nat arg)
   427   | parse_test_param ("iterations", [arg]) = Config.put_generic iterations (read_nat arg)
   428   | parse_test_param ("default_type", arg) = (fn gen_ctxt =>
   429     map_test_params ((apfst o K) (map (ProofContext.read_typ (Context.proof_of gen_ctxt)) arg)) gen_ctxt)
   430   | parse_test_param ("no_assms", [arg]) = Config.put_generic no_assms (read_bool arg)
   431   | parse_test_param ("expect", [arg]) = map_test_params ((apsnd o K) (read_expectation arg))
   432   | parse_test_param ("report", [arg]) = Config.put_generic report (read_bool arg)
   433   | parse_test_param ("quiet", [arg]) = Config.put_generic quiet (read_bool arg)
   434   | parse_test_param ("timeout", [arg]) = Config.put_generic timeout (read_real arg)
   435   | parse_test_param ("finite_types", [arg]) = Config.put_generic finite_types (read_bool arg)
   436   | parse_test_param ("finite_type_size", [arg]) = Config.put_generic finite_type_size (read_nat arg)
   437   | parse_test_param (name, _) = error ("Unknown test parameter: " ^ name);
   438 
   439 fun parse_test_param_inst ("generator", [arg]) ((generator, insts), ctxt) =
   440       (apfst o apfst o K o SOME) arg ((generator, insts), ctxt)
   441   | parse_test_param_inst (name, arg) ((generator, insts), ctxt) =
   442       case try (ProofContext.read_typ ctxt) name
   443        of SOME (TFree (v, _)) => (apfst o apsnd o AList.update (op =))
   444               (v, ProofContext.read_typ ctxt (the_single arg)) ((generator, insts), ctxt)
   445         | _ => (apsnd o Context.proof_map o parse_test_param) (name, arg) ((generator, insts), ctxt);
   446 
   447 fun quickcheck_params_cmd args = Context.theory_map (fold parse_test_param args);
   448   
   449 fun gen_quickcheck args i state =
   450   state
   451   |> Proof.map_context_result (fn ctxt => fold parse_test_param_inst args ((NONE, []), ctxt))
   452   |> (fn ((generator, insts), state') => test_goal (generator, insts) i state'
   453   |> tap (fn (SOME x, _) => if expect (Proof.context_of state') = No_Counterexample then
   454                error ("quickcheck expected to find no counterexample but found one") else ()
   455            | (NONE, _) => if expect (Proof.context_of state') = Counterexample then
   456                error ("quickcheck expected to find a counterexample but did not find one") else ()))
   457 
   458 fun quickcheck args i state = fst (gen_quickcheck args i state);
   459 
   460 fun quickcheck_cmd args i state =
   461   gen_quickcheck args i (Toplevel.proof_of state)
   462   |> Pretty.writeln o pretty_counterex_and_reports (Toplevel.context_of state) false;
   463 
   464 val parse_arg = Parse.name -- (Scan.optional (Parse.$$$ "=" |-- 
   465   (((Parse.name || Parse.float_number) >> single) || (Parse.$$$ "[" |-- Parse.list1 Parse.name --| Parse.$$$ "]"))) ["true"]);
   466 
   467 val parse_args = Parse.$$$ "[" |-- Parse.list1 parse_arg --| Parse.$$$ "]"
   468   || Scan.succeed [];
   469 
   470 val _ =
   471   Outer_Syntax.command "quickcheck_params" "set parameters for random testing" Keyword.thy_decl
   472     (parse_args >> (fn args => Toplevel.theory (quickcheck_params_cmd args)));
   473 
   474 val _ =
   475   Outer_Syntax.improper_command "quickcheck" "try to find counterexample for subgoal" Keyword.diag
   476     (parse_args -- Scan.optional Parse.nat 1
   477       >> (fn (args, i) => Toplevel.no_timing o Toplevel.keep (quickcheck_cmd args i)));
   478 
   479 end;
   480 
   481 
   482 val auto_quickcheck = Quickcheck.auto;