src/HOL/TPTP/TPTP_Parser/tptp_syntax.ML
author wenzelm
Wed, 11 Apr 2012 20:42:28 +0200
changeset 48297 26c1a97c7784
parent 48218 d1ecc9cec531
child 48325 479b4d6b9562
permissions -rw-r--r--
standardized ML aliases;
     1 (*  Title:      HOL/TPTP/TPTP_Parser/tptp_syntax.ML
     2     Author:     Nik Sultana, Cambridge University Computer Laboratory
     3 
     4 TPTP abstract syntax and parser-related definitions.
     5 *)
     6 
     7 signature TPTP_SYNTAX =
     8 sig
     9   exception TPTP_SYNTAX of string
    10   val debug: ('a -> unit) -> 'a -> unit
    11 
    12 (*Note that in THF "^ [X] : ^ [Y] : f @ g" should parse
    13   as "(^ [X] : (^ [Y] : f)) @ g"
    14 *)
    15 
    16   datatype number_kind = Int_num | Real_num | Rat_num
    17 
    18   datatype status_value =
    19       Suc | Unp | Sap | Esa | Sat | Fsa
    20     | Thm | Eqv | Tac | Wec | Eth | Tau
    21     | Wtc | Wth | Cax | Sca | Tca | Wca
    22     | Cup | Csp | Ecs | Csa | Cth | Ceq
    23     | Unc | Wcc | Ect | Fun | Uns | Wuc
    24     | Wct | Scc | Uca | Noc
    25 
    26   type name = string
    27   type atomic_word = string
    28   type inference_rule = atomic_word
    29   type file_info = name option
    30   type single_quoted = string
    31   type file_name = single_quoted
    32   type creator_name = atomic_word
    33   type variable = string
    34   type upper_word = string
    35 
    36   datatype language = FOF | CNF | TFF | THF | FOT | TFF_with_arithmetic
    37   and role =
    38      Role_Axiom | Role_Hypothesis | Role_Definition | Role_Assumption |
    39      Role_Lemma | Role_Theorem | Role_Conjecture | Role_Negated_Conjecture |
    40      Role_Plain | Role_Fi_Domain | Role_Fi_Functors | Role_Fi_Predicates |
    41      Role_Type | Role_Unknown
    42 
    43   and general_data = (*Bind of variable * formula_data*)
    44      Atomic_Word of string
    45    | Application of string * general_term list (*general_function*)
    46    | V of upper_word (*variable*)
    47    | Number of number_kind * string
    48    | Distinct_Object of string
    49    | (*formula_data*) Formula_Data of language * tptp_formula (* $thf(<thf_formula>) *)
    50    | (*formula_data*) Term_Data of tptp_term
    51 
    52   and interpreted_symbol =
    53     UMinus | Sum | Difference | Product | Quotient | Quotient_E |
    54     Quotient_T | Quotient_F | Remainder_E | Remainder_T | Remainder_F |
    55     Floor | Ceiling | Truncate | Round | To_Int | To_Rat | To_Real |
    56     (*FIXME these should be in defined_pred, but that's not being used in TPTP*)
    57     Less | LessEq | Greater | GreaterEq | EvalEq | Is_Int | Is_Rat |
    58     Distinct | Apply
    59 
    60   and logic_symbol = Equals | NEquals | Or | And | Iff | If | Fi | Xor |
    61     Nor | Nand | Not | Op_Forall | Op_Exists |
    62     (*FIXME these should be in defined_pred, but that's not being used in TPTP*)
    63     True | False
    64 
    65   and quantifier = (*interpreted binders*)
    66     Forall | Exists | Epsilon | Iota | Lambda | Dep_Prod | Dep_Sum
    67 
    68   and tptp_base_type =
    69     Type_Ind | Type_Bool | Type_Type | Type_Int | Type_Rat | Type_Real
    70 
    71   and symbol =
    72       Uninterpreted of string
    73     | Interpreted_ExtraLogic of interpreted_symbol
    74     | Interpreted_Logic of logic_symbol
    75     | TypeSymbol of tptp_base_type
    76     | System of string
    77 
    78   and general_term =
    79       General_Data of general_data (*general_data*)
    80     | General_Term of general_data * general_term (*general_data : general_term*)
    81     | General_List of general_term list
    82 
    83   and tptp_term =
    84       Term_Func of symbol * tptp_term list
    85     | Term_Var of string
    86     | Term_Conditional of tptp_formula * tptp_term * tptp_term
    87     | Term_Num of number_kind * string
    88     | Term_Distinct_Object of string
    89     | Term_Let of tptp_let list * tptp_term (*FIXME remove list?*)
    90 
    91   and tptp_atom =
    92       TFF_Typed_Atom of symbol * tptp_type option (*only TFF*)
    93     | THF_Atom_term of tptp_term   (*from here on, only THF*)
    94     | THF_Atom_conn_term of symbol
    95 
    96   and tptp_formula =
    97       Pred of symbol * tptp_term list
    98     | Fmla of symbol * tptp_formula list
    99     | Sequent of tptp_formula list * tptp_formula list
   100     | Quant of quantifier * (string * tptp_type option) list * tptp_formula
   101     | Conditional of tptp_formula * tptp_formula * tptp_formula
   102     | Let of tptp_let list * tptp_formula (*FIXME remove list?*)
   103     | Atom of tptp_atom
   104     | Type_fmla of tptp_type
   105     | THF_typing of tptp_formula * tptp_type (*only THF*)
   106 
   107   and tptp_let =
   108       Let_fmla of (string * tptp_type option) * tptp_formula
   109     | Let_term of (string * tptp_type option) * tptp_term (*only TFF*)
   110 
   111   and tptp_type =
   112       Prod_type of tptp_type * tptp_type
   113     | Fn_type of tptp_type * tptp_type
   114     | Atom_type of string
   115     | Defined_type of tptp_base_type
   116     | Sum_type of tptp_type * tptp_type (*only THF*)
   117     | Fmla_type of tptp_formula
   118     | Subtype of symbol * symbol (*only THF*)
   119 
   120   type general_list = general_term list
   121   type parent_details = general_list
   122   type useful_info = general_term list
   123   type info = useful_info
   124 
   125   type annotation = general_term * general_term list
   126 
   127   exception DEQUOTE of string
   128 
   129   type position = string * int * int
   130 
   131   datatype tptp_line =
   132       Annotated_Formula of position * language * string * role *
   133         tptp_formula * annotation option
   134    |  Include of string * string list
   135 
   136   type tptp_problem = tptp_line list
   137 
   138   val dequote : single_quoted -> single_quoted
   139 
   140   val role_to_string : role -> string
   141 
   142   val status_to_string : status_value -> string
   143 
   144   val nameof_tff_atom_type : tptp_type -> string
   145 
   146   (*Returns the list of all files included in a directory and its
   147   subdirectories. This is only used for testing the parser/interpreter against
   148   all THF problems.*)
   149   val get_file_list : Path.T -> Path.T list
   150 
   151   val string_of_tptp_term : tptp_term -> string
   152   val string_of_tptp_formula : tptp_formula -> string
   153 
   154 end
   155 
   156 
   157 structure TPTP_Syntax : TPTP_SYNTAX =
   158 struct
   159 
   160 exception TPTP_SYNTAX of string
   161 
   162 datatype number_kind = Int_num | Real_num | Rat_num
   163 
   164 datatype status_value =
   165     Suc | Unp | Sap | Esa | Sat | Fsa
   166   | Thm | Eqv | Tac | Wec | Eth | Tau
   167   | Wtc | Wth | Cax | Sca | Tca | Wca
   168   | Cup | Csp | Ecs | Csa | Cth | Ceq
   169   | Unc | Wcc | Ect | Fun | Uns | Wuc
   170   | Wct | Scc | Uca | Noc
   171 
   172 type name = string
   173 type atomic_word = string
   174 type inference_rule = atomic_word
   175 type file_info = name option
   176 type single_quoted = string
   177 type file_name = single_quoted
   178 type creator_name = atomic_word
   179 type variable = string
   180 type upper_word = string
   181 
   182 datatype language = FOF | CNF | TFF | THF | FOT | TFF_with_arithmetic
   183 and role =
   184   Role_Axiom | Role_Hypothesis | Role_Definition | Role_Assumption |
   185   Role_Lemma | Role_Theorem | Role_Conjecture | Role_Negated_Conjecture |
   186   Role_Plain | Role_Fi_Domain | Role_Fi_Functors | Role_Fi_Predicates |
   187   Role_Type | Role_Unknown
   188 and general_data = (*Bind of variable * formula_data*)
   189     Atomic_Word of string
   190   | Application of string * (general_term list)
   191   | V of upper_word (*variable*)
   192   | Number of number_kind * string
   193   | Distinct_Object of string
   194   | (*formula_data*) Formula_Data of language * tptp_formula (* $thf(<thf_formula>) *)
   195   | (*formula_data*) Term_Data of tptp_term
   196 
   197   and interpreted_symbol =
   198     UMinus | Sum | Difference | Product | Quotient | Quotient_E |
   199     Quotient_T | Quotient_F | Remainder_E | Remainder_T | Remainder_F |
   200     Floor | Ceiling | Truncate | Round | To_Int | To_Rat | To_Real |
   201     Less | LessEq | Greater | GreaterEq | EvalEq | Is_Int | Is_Rat |
   202     Distinct |
   203     Apply
   204 
   205   and logic_symbol = Equals | NEquals | Or | And | Iff | If | Fi | Xor |
   206     Nor | Nand | Not | Op_Forall | Op_Exists |
   207     True | False
   208 
   209   and quantifier = (*interpreted binders*)
   210     Forall | Exists | Epsilon | Iota | Lambda | Dep_Prod | Dep_Sum
   211 
   212   and tptp_base_type =
   213     Type_Ind | Type_Bool | Type_Type | Type_Int | Type_Rat | Type_Real
   214 
   215   and symbol =
   216       Uninterpreted of string
   217     | Interpreted_ExtraLogic of interpreted_symbol
   218     | Interpreted_Logic of logic_symbol
   219     | TypeSymbol of tptp_base_type
   220     | System of string
   221 
   222   and general_term =
   223       General_Data of general_data (*general_data*)
   224     | General_Term of general_data * general_term (*general_data : general_term*)
   225     | General_List of general_term list
   226 
   227   and tptp_term =
   228       Term_Func of symbol * tptp_term list
   229     | Term_Var of string
   230     | Term_Conditional of tptp_formula * tptp_term * tptp_term
   231     | Term_Num of number_kind * string
   232     | Term_Distinct_Object of string
   233     | Term_Let of tptp_let list * tptp_term (*FIXME remove list?*)
   234 
   235   and tptp_atom =
   236       TFF_Typed_Atom of symbol * tptp_type option (*only TFF*)
   237     | THF_Atom_term of tptp_term   (*from here on, only THF*)
   238     | THF_Atom_conn_term of symbol
   239 
   240   and tptp_formula =
   241       Pred of symbol * tptp_term list
   242     | Fmla of symbol * tptp_formula list
   243     | Sequent of tptp_formula list * tptp_formula list
   244     | Quant of quantifier * (string * tptp_type option) list * tptp_formula
   245     | Conditional of tptp_formula * tptp_formula * tptp_formula
   246     | Let of tptp_let list * tptp_formula
   247     | Atom of tptp_atom
   248     | Type_fmla of tptp_type
   249     | THF_typing of tptp_formula * tptp_type
   250 
   251   and tptp_let =
   252       Let_fmla of (string * tptp_type option) * tptp_formula
   253     | Let_term of (string * tptp_type option) * tptp_term
   254 
   255   and tptp_type =
   256       Prod_type of tptp_type * tptp_type
   257     | Fn_type of tptp_type * tptp_type
   258     | Atom_type of string
   259     | Defined_type of tptp_base_type
   260     | Sum_type of tptp_type * tptp_type
   261     | Fmla_type of tptp_formula
   262     | Subtype of symbol * symbol
   263 
   264 type general_list = general_term list
   265 type parent_details = general_list
   266 type useful_info = general_term list
   267 type info = useful_info
   268 
   269 (*type annotation = (source * info option)*)
   270 type annotation = general_term * general_term list
   271 
   272 exception DEQUOTE of string
   273 
   274 type position = string * int * int
   275 
   276 datatype tptp_line =
   277     Annotated_Formula of position * language * string * role * tptp_formula * annotation option
   278  |  Include of string * string list
   279 
   280 type tptp_problem = tptp_line list
   281 
   282 fun debug f x = if !Runtime.debug then (f x; ()) else ()
   283 
   284 fun nameof_tff_atom_type (Atom_type str) = str
   285   | nameof_tff_atom_type _ = raise TPTP_SYNTAX "nameof_tff_atom_type called on non-atom type"
   286 
   287 (*Used for debugging. Returns all files contained within a directory or its
   288 subdirectories. Follows symbolic links, filters away directories.*)
   289 fun get_file_list path =
   290   let
   291     fun check_file_entry f rest =
   292       let
   293         (*NOTE needed since no File.is_link and File.read_link*)
   294         val f_str = Path.implode f
   295       in
   296         if File.is_dir f then
   297           rest @ get_file_list f
   298         else if OS.FileSys.isLink f_str then
   299           (*follow links -- NOTE this breaks if links are relative paths*)
   300           check_file_entry (Path.explode (OS.FileSys.readLink f_str)) rest
   301         else f :: rest
   302       end
   303   in
   304     File.read_dir path
   305     |> map
   306         (Path.explode
   307         #> Path.append path)
   308     |> (fn l => fold check_file_entry l [])
   309   end
   310 
   311 fun role_to_string role =
   312   case role of
   313       Role_Axiom => "axiom"
   314     | Role_Hypothesis => "hypothesis"
   315     | Role_Definition => "definition"
   316     | Role_Assumption => "assumption"
   317     | Role_Lemma => "lemma"
   318     | Role_Theorem => "theorem"
   319     | Role_Conjecture => "conjecture"
   320     | Role_Negated_Conjecture => "negated_conjecture"
   321     | Role_Plain => "plain"
   322     | Role_Fi_Domain => "fi_domain"
   323     | Role_Fi_Functors => "fi_functors"
   324     | Role_Fi_Predicates => "fi_predicates"
   325     | Role_Type => "type"
   326     | Role_Unknown => "unknown"
   327 
   328 (*accepts a string "'abc'" and returns "abc"*)
   329 fun dequote str : single_quoted =
   330   if str = "" then
   331     raise (DEQUOTE "empty string")
   332   else
   333     (unprefix "'" str
   334     |> unsuffix "'"
   335     handle (Fail str) =>
   336       if str = "unprefix" then
   337         raise DEQUOTE ("string doesn't open with quote:" ^ str)
   338       else if str = "unsuffix" then
   339         raise DEQUOTE ("string doesn't close with quote:" ^ str)
   340       else raise Fail str)
   341 
   342 
   343 (* Printing parsed TPTP formulas *)
   344 (*FIXME this is not pretty-printing, just printing*)
   345 
   346 fun status_to_string status_value =
   347   case status_value of
   348       Suc => "suc"	| Unp => "unp"
   349     | Sap => "sap"  | Esa => "esa"
   350     | Sat => "sat"  | Fsa => "fsa"
   351     | Thm => "thm"  | Wuc => "wuc"
   352     | Eqv => "eqv"	| Tac => "tac"
   353     | Wec => "wec"	| Eth => "eth"
   354     | Tau => "tau"	| Wtc => "wtc"
   355     | Wth => "wth"	| Cax => "cax"
   356     | Sca => "sca"	| Tca => "tca"
   357     | Wca => "wca"	| Cup => "cup"
   358     | Csp => "csp"	| Ecs => "ecs"
   359     | Csa => "csa"	| Cth => "cth"
   360     | Ceq => "ceq"	| Unc => "unc"
   361     | Wcc => "wcc"	| Ect => "ect"
   362     | Fun => "fun"	| Uns => "uns"
   363     | Wct => "wct"	| Scc => "scc"
   364     | Uca => "uca"	| Noc => "noc"
   365 
   366 fun string_of_tptp_term x =
   367   case x of
   368       Term_Func (symbol, tptp_term_list) =>
   369         "(" ^ string_of_symbol symbol ^ " " ^
   370         space_implode " " (map string_of_tptp_term tptp_term_list) ^ ")"
   371     | Term_Var str => str
   372     | Term_Conditional (tptp_formula, tptp_term1, tptp_term2) => "" (*FIXME*)
   373     | Term_Num (_, str) => str
   374     | Term_Distinct_Object str => str
   375 
   376 and string_of_symbol (Uninterpreted str) = str
   377   | string_of_symbol (Interpreted_ExtraLogic interpreted_symbol) = string_of_interpreted_symbol interpreted_symbol
   378   | string_of_symbol (Interpreted_Logic logic_symbol) = string_of_logic_symbol logic_symbol
   379   | string_of_symbol (TypeSymbol tptp_base_type) = string_of_tptp_base_type tptp_base_type
   380   | string_of_symbol (System str) = str
   381 
   382 and string_of_tptp_base_type Type_Ind = "$i"
   383   | string_of_tptp_base_type Type_Bool = "$o"
   384   | string_of_tptp_base_type Type_Type = "$tType"
   385   | string_of_tptp_base_type Type_Int = "$int"
   386   | string_of_tptp_base_type Type_Rat = "$rat"
   387   | string_of_tptp_base_type Type_Real = "$real"
   388 
   389 and string_of_interpreted_symbol x =
   390   case x of
   391       UMinus => "$uminus"
   392     | Sum => "$sum"
   393     | Difference => "$difference"
   394     | Product => "$product"
   395     | Quotient => "$quotient"
   396     | Quotient_E => "$quotient_e"
   397     | Quotient_T => "$quotient_t"
   398     | Quotient_F => "$quotient_f"
   399     | Remainder_E => "$remainder_e"
   400     | Remainder_T => "$remainder_t"
   401     | Remainder_F => "$remainder_f"
   402     | Floor => "$floor"
   403     | Ceiling => "$ceiling"
   404     | Truncate => "$truncate"
   405     | Round => "$round"
   406     | To_Int => "$to_int"
   407     | To_Rat => "$to_rat"
   408     | To_Real => "$to_real"
   409     | Less => "$less"
   410     | LessEq => "$lesseq"
   411     | Greater => "$greater"
   412     | GreaterEq => "$greatereq"
   413     | EvalEq => "$evaleq"
   414     | Is_Int => "$is_int"
   415     | Is_Rat => "$is_rat"
   416     | Apply => "@"
   417 
   418 and string_of_logic_symbol Equals = "="
   419   | string_of_logic_symbol NEquals = "!="
   420   | string_of_logic_symbol Or = "|"
   421   | string_of_logic_symbol And = "&"
   422   | string_of_logic_symbol Iff = "<=>"
   423   | string_of_logic_symbol If = "=>"
   424   | string_of_logic_symbol Fi = "<="
   425   | string_of_logic_symbol Xor = "<~>"
   426   | string_of_logic_symbol Nor = "~|"
   427   | string_of_logic_symbol Nand = "~&"
   428   | string_of_logic_symbol Not = "~"
   429   | string_of_logic_symbol Op_Forall = "!!"
   430   | string_of_logic_symbol Op_Exists = "??"
   431   | string_of_logic_symbol True = "$true"
   432   | string_of_logic_symbol False = "$false"
   433 
   434 and string_of_quantifier Forall = "!"
   435   | string_of_quantifier Exists  = "?"
   436   | string_of_quantifier Epsilon  = "@+"
   437   | string_of_quantifier Iota  = "@-"
   438   | string_of_quantifier Lambda  = "^"
   439   | string_of_quantifier Dep_Prod = "!>"
   440   | string_of_quantifier Dep_Sum  = "?*"
   441 
   442 and string_of_tptp_atom (TFF_Typed_Atom (symbol, tptp_type_option)) =
   443     (case tptp_type_option of
   444        NONE => string_of_symbol symbol
   445      | SOME tptp_type =>
   446          string_of_symbol symbol ^ " : " ^ string_of_tptp_type tptp_type)
   447   | string_of_tptp_atom (THF_Atom_term tptp_term) = string_of_tptp_term tptp_term
   448   | string_of_tptp_atom (THF_Atom_conn_term symbol) = string_of_symbol symbol
   449 
   450 and string_of_tptp_formula (Pred (symbol, tptp_term_list)) =
   451       "(" ^ string_of_symbol symbol ^
   452       space_implode " " (map string_of_tptp_term tptp_term_list) ^ ")"
   453   | string_of_tptp_formula (Fmla (symbol, tptp_formula_list)) =
   454       "(" ^
   455       string_of_symbol symbol ^
   456       space_implode " " (map string_of_tptp_formula tptp_formula_list) ^ ")"
   457   | string_of_tptp_formula (Sequent (tptp_formula_list1, tptp_formula_list2)) = "" (*FIXME*)
   458   | string_of_tptp_formula (Quant (quantifier, varlist, tptp_formula)) =
   459       string_of_quantifier quantifier ^ "[" ^
   460       space_implode ", " (map (fn (n, ty) =>
   461         case ty of
   462           NONE => n
   463         | SOME ty => n ^ " : " ^ string_of_tptp_type ty) varlist) ^ "] : (" ^
   464       string_of_tptp_formula tptp_formula ^ ")"
   465   | string_of_tptp_formula (Conditional _) = "" (*FIXME*)
   466   | string_of_tptp_formula (Let _) = "" (*FIXME*)
   467   | string_of_tptp_formula (Atom tptp_atom) = string_of_tptp_atom tptp_atom
   468   | string_of_tptp_formula (Type_fmla tptp_type) = string_of_tptp_type tptp_type
   469   | string_of_tptp_formula (THF_typing (tptp_formula, tptp_type)) =
   470       string_of_tptp_formula tptp_formula ^ " : " ^ string_of_tptp_type tptp_type
   471 
   472 and string_of_tptp_type (Prod_type (tptp_type1, tptp_type2)) =
   473       string_of_tptp_type tptp_type1 ^ " * " ^ string_of_tptp_type tptp_type2
   474   | string_of_tptp_type (Fn_type (tptp_type1, tptp_type2)) =
   475       string_of_tptp_type tptp_type1 ^ " > " ^ string_of_tptp_type tptp_type2
   476   | string_of_tptp_type (Atom_type str) = str
   477   | string_of_tptp_type (Defined_type tptp_base_type) =
   478       string_of_tptp_base_type tptp_base_type
   479   | string_of_tptp_type (Sum_type (tptp_type1, tptp_type2)) = ""
   480   | string_of_tptp_type (Fmla_type tptp_formula) = string_of_tptp_formula tptp_formula
   481   | string_of_tptp_type (Subtype (symbol1, symbol2)) =
   482       string_of_symbol symbol1 ^ " << " ^ string_of_symbol symbol2
   483 
   484 end