src/HOL/TPTP/TPTP_Parser/tptp_syntax.ML
author sultana
Wed, 04 Apr 2012 16:29:16 +0100
changeset 48215 15e579392a68
parent 47715 5d9aab0c609c
child 48218 d1ecc9cec531
permissions -rw-r--r--
refactored tptp yacc to bring close to official spec;
     1 (*  Title:      HOL/TPTP/TPTP_Parser/tptp_syntax.ML
     2     Author:     Nik Sultana, Cambridge University Computer Laboratory
     3 
     4 TPTP abstract syntax and parser-related definitions.
     5 *)
     6 
     7 signature TPTP_SYNTAX =
     8 sig
     9   exception TPTP_SYNTAX of string
    10   val debug: ('a -> unit) -> 'a -> unit
    11 
    12 (*Note that in THF "^ [X] : ^ [Y] : f @ g" should parse
    13   as "(^ [X] : (^ [Y] : f)) @ g"
    14 *)
    15 
    16   datatype number_kind = Int_num | Real_num | Rat_num
    17 
    18   datatype status_value =
    19       Suc | Unp | Sap | Esa | Sat | Fsa
    20     | Thm | Eqv | Tac | Wec | Eth | Tau
    21     | Wtc | Wth | Cax | Sca | Tca | Wca
    22     | Cup | Csp | Ecs | Csa | Cth | Ceq
    23     | Unc | Wcc | Ect | Fun | Uns | Wuc
    24     | Wct | Scc | Uca | Noc
    25 
    26   type name = string
    27   type atomic_word = string
    28   type inference_rule = atomic_word
    29   type file_info = name option
    30   type single_quoted = string
    31   type file_name = single_quoted
    32   type creator_name = atomic_word
    33   type variable = string
    34   type upper_word = string
    35 
    36   datatype language = FOF | CNF | TFF | THF | FOT | TFF_with_arithmetic
    37   and role =
    38      Role_Axiom | Role_Hypothesis | Role_Definition | Role_Assumption |
    39      Role_Lemma | Role_Theorem | Role_Conjecture | Role_Negated_Conjecture |
    40      Role_Plain | Role_Fi_Domain | Role_Fi_Functors | Role_Fi_Predicates |
    41      Role_Type | Role_Unknown
    42 
    43   and general_data = (*Bind of variable * formula_data*)
    44      Atomic_Word of string
    45    | Application of string * general_term list (*general_function*)
    46    | V of upper_word (*variable*)
    47    | Number of number_kind * string
    48    | Distinct_Object of string
    49    | (*formula_data*) Formula_Data of language * tptp_formula (* $thf(<thf_formula>) *)
    50    | (*formula_data*) Term_Data of tptp_term
    51 
    52   and interpreted_symbol =
    53     UMinus | Sum | Difference | Product | Quotient | Quotient_E |
    54     Quotient_T | Quotient_F | Remainder_E | Remainder_T | Remainder_F |
    55     Floor | Ceiling | Truncate | Round | To_Int | To_Rat | To_Real |
    56     (*these should be in defined_pred, but that's not being used in TPTP*)
    57     Less | LessEq | Greater | GreaterEq | EvalEq | Is_Int | Is_Rat |
    58     Distinct |
    59     Apply (*this is just a matter of convenience*)
    60 
    61   and logic_symbol = Equals | NEquals | Or | And | Iff | If | Fi | Xor |
    62     Nor | Nand | Not | Op_Forall | Op_Exists |
    63     (*these should be in defined_pred, but that's not being used in TPTP*)
    64     True | False
    65 
    66   and quantifier = (*interpreted binders*)
    67     Forall | Exists | Epsilon | Iota | Lambda | Dep_Prod | Dep_Sum
    68 
    69   and tptp_base_type =
    70     Type_Ind | Type_Bool | Type_Type | Type_Int | Type_Rat | Type_Real
    71 
    72   and symbol =
    73       Uninterpreted of string
    74     | Interpreted_ExtraLogic of interpreted_symbol
    75     | Interpreted_Logic of logic_symbol
    76     | TypeSymbol of tptp_base_type
    77     | System of string
    78 
    79   and general_term =
    80       General_Data of general_data (*general_data*)
    81     | General_Term of general_data * general_term (*general_data : general_term*)
    82     | General_List of general_term list
    83 
    84   and tptp_term =
    85       Term_Func of symbol * tptp_term list
    86     | Term_Var of string
    87     | Term_Conditional of tptp_formula * tptp_term * tptp_term
    88     | Term_Num of number_kind * string
    89     | Term_Distinct_Object of string
    90     | Term_Let of tptp_let list * tptp_term (*FIXME remove list?*)
    91 
    92   and tptp_atom =
    93       TFF_Typed_Atom of symbol * tptp_type option (*only TFF*)
    94     | THF_Atom_term of tptp_term   (*from here on, only THF*)
    95     | THF_Atom_conn_term of symbol
    96 
    97   and tptp_formula =
    98       Pred of symbol * tptp_term list
    99     | Fmla of symbol * tptp_formula list
   100     | Sequent of tptp_formula list * tptp_formula list
   101     | Quant of quantifier * (string * tptp_type option) list * tptp_formula
   102     | Conditional of tptp_formula * tptp_formula * tptp_formula
   103     | Let of tptp_let list * tptp_formula (*FIXME remove list?*)
   104     | Atom of tptp_atom
   105     | THF_type of tptp_type
   106     | THF_typing of tptp_formula * tptp_type (*only THF*)
   107 
   108   and tptp_let =
   109       Let_fmla of (string * tptp_type option) * tptp_formula
   110     | Let_term of (string * tptp_type option) * tptp_term (*only TFF*)
   111 
   112   and tptp_type =
   113       Prod_type of tptp_type * tptp_type
   114     | Fn_type of tptp_type * tptp_type
   115     | Atom_type of string
   116     | Defined_type of tptp_base_type
   117     | Sum_type of tptp_type * tptp_type (*only THF*)
   118     | Fmla_type of tptp_formula
   119     | Subtype of symbol * symbol (*only THF*)
   120 
   121   type general_list = general_term list
   122   type parent_details = general_list
   123   type useful_info = general_term list
   124   type info = useful_info
   125 
   126   type annotation = general_term * general_term list
   127 
   128   exception DEQUOTE of string
   129 
   130   type position = string * int * int
   131 
   132   datatype tptp_line =
   133       Annotated_Formula of position * language * string * role * tptp_formula * annotation option
   134    |  Include of string * string list
   135 
   136   type tptp_problem = tptp_line list
   137 
   138   val dequote : single_quoted -> single_quoted
   139 
   140   val role_to_string : role -> string
   141 
   142   val status_to_string : status_value -> string
   143 
   144   val nameof_tff_atom_type : tptp_type -> string
   145 
   146   (*Returns the list of all files included in a directory and its
   147   subdirectories. This is only used for testing the parser/interpreter against
   148   all THF problems.*)
   149   val get_file_list : Path.T -> Path.T list
   150 
   151   val string_of_tptp_term : tptp_term -> string
   152   val string_of_tptp_formula : tptp_formula -> string
   153 
   154 end
   155 
   156 
   157 structure TPTP_Syntax : TPTP_SYNTAX =
   158 struct
   159 
   160 exception TPTP_SYNTAX of string
   161 
   162 datatype number_kind = Int_num | Real_num | Rat_num
   163 
   164 datatype status_value =
   165     Suc | Unp | Sap | Esa | Sat | Fsa
   166   | Thm | Eqv | Tac | Wec | Eth | Tau
   167   | Wtc | Wth | Cax | Sca | Tca | Wca
   168   | Cup | Csp | Ecs | Csa | Cth | Ceq
   169   | Unc | Wcc | Ect | Fun | Uns | Wuc
   170   | Wct | Scc | Uca | Noc
   171 
   172 type name = string
   173 type atomic_word = string
   174 type inference_rule = atomic_word
   175 type file_info = name option
   176 type single_quoted = string
   177 type file_name = single_quoted
   178 type creator_name = atomic_word
   179 type variable = string
   180 type upper_word = string
   181 
   182 datatype language = FOF | CNF | TFF | THF | FOT | TFF_with_arithmetic
   183 and role =
   184   Role_Axiom | Role_Hypothesis | Role_Definition | Role_Assumption |
   185   Role_Lemma | Role_Theorem | Role_Conjecture | Role_Negated_Conjecture |
   186   Role_Plain | Role_Fi_Domain | Role_Fi_Functors | Role_Fi_Predicates |
   187   Role_Type | Role_Unknown
   188 and general_data = (*Bind of variable * formula_data*)
   189     Atomic_Word of string
   190   | Application of string * (general_term list)
   191   | V of upper_word (*variable*)
   192   | Number of number_kind * string
   193   | Distinct_Object of string
   194   | (*formula_data*) Formula_Data of language * tptp_formula (* $thf(<thf_formula>) *)
   195   | (*formula_data*) Term_Data of tptp_term
   196 
   197   and interpreted_symbol =
   198     UMinus | Sum | Difference | Product | Quotient | Quotient_E |
   199     Quotient_T | Quotient_F | Remainder_E | Remainder_T | Remainder_F |
   200     Floor | Ceiling | Truncate | Round | To_Int | To_Rat | To_Real |
   201     (*these should be in defined_pred, but that's not being used in TPTP*)
   202     Less | LessEq | Greater | GreaterEq | EvalEq | Is_Int | Is_Rat |
   203     Distinct |
   204     Apply (*this is just a matter of convenience*)
   205 
   206   and logic_symbol = Equals | NEquals | Or | And | Iff | If | Fi | Xor |
   207     Nor | Nand | Not | Op_Forall | Op_Exists |
   208     (*these should be in defined_pred, but that's not being used in TPTP*)
   209     True | False
   210 
   211   and quantifier = (*interpreted binders*)
   212     Forall | Exists | Epsilon | Iota | Lambda | Dep_Prod | Dep_Sum
   213 
   214   and tptp_base_type =
   215     Type_Ind | Type_Bool | Type_Type | Type_Int | Type_Rat | Type_Real
   216 
   217   and symbol =
   218       Uninterpreted of string
   219     | Interpreted_ExtraLogic of interpreted_symbol
   220     | Interpreted_Logic of logic_symbol
   221     | TypeSymbol of tptp_base_type
   222     | System of string
   223 
   224   and general_term =
   225       General_Data of general_data (*general_data*)
   226     | General_Term of general_data * general_term (*general_data : general_term*)
   227     | General_List of general_term list
   228 
   229   and tptp_term =
   230       Term_Func of symbol * tptp_term list
   231     | Term_Var of string
   232     | Term_Conditional of tptp_formula * tptp_term * tptp_term
   233     | Term_Num of number_kind * string
   234     | Term_Distinct_Object of string
   235     | Term_Let of tptp_let list * tptp_term (*FIXME remove list?*)
   236 
   237   and tptp_atom =
   238       TFF_Typed_Atom of symbol * tptp_type option (*only TFF*)
   239     | THF_Atom_term of tptp_term   (*from here on, only THF*)
   240     | THF_Atom_conn_term of symbol
   241 
   242   and tptp_formula =
   243       Pred of symbol * tptp_term list
   244     | Fmla of symbol * tptp_formula list
   245     | Sequent of tptp_formula list * tptp_formula list
   246     | Quant of quantifier * (string * tptp_type option) list * tptp_formula
   247     | Conditional of tptp_formula * tptp_formula * tptp_formula
   248     | Let of tptp_let list * tptp_formula
   249     | Atom of tptp_atom
   250     | THF_type of tptp_type
   251     | THF_typing of tptp_formula * tptp_type
   252 
   253   and tptp_let =
   254       Let_fmla of (string * tptp_type option) * tptp_formula (*both TFF and THF*)
   255     | Let_term of (string * tptp_type option) * tptp_term  (*only TFF*)
   256 
   257   and tptp_type =
   258       Prod_type of tptp_type * tptp_type
   259     | Fn_type of tptp_type * tptp_type
   260     | Atom_type of string
   261     | Defined_type of tptp_base_type
   262     | Sum_type of tptp_type * tptp_type (*only THF*)
   263     | Fmla_type of tptp_formula (*only THF*)
   264     | Subtype of symbol * symbol (*only THF*)
   265 
   266 type general_list = general_term list
   267 type parent_details = general_list
   268 type useful_info = general_term list
   269 type info = useful_info
   270 
   271 (*type annotation = (source * info option)*)
   272 type annotation = general_term * general_term list
   273 
   274 exception DEQUOTE of string
   275 
   276 (*
   277 datatype defined_functor =
   278   ITE_T | UMINUS | SUM | DIFFERENCE | PRODUCT | QUOTIENT | QUOTIENT_E |
   279   QUOTIENT_T | QUOTIENT_F | REMAINDER_E | REMAINDER_T | REMAINDER_F |
   280   FLOOR | CEILING | TRUNCATE | ROUND | TO_INT | TO_RAT | TO_REAL
   281 *)
   282 
   283 type position = string * int * int
   284 
   285 datatype tptp_line =
   286     Annotated_Formula of position * language * string * role * tptp_formula * annotation option
   287  |  Include of string * string list
   288 
   289 type tptp_problem = tptp_line list
   290 
   291 fun debug f x = if !Runtime.debug then (f x; ()) else ()
   292 
   293 fun nameof_tff_atom_type (Atom_type str) = str
   294   | nameof_tff_atom_type _ = raise TPTP_SYNTAX "nameof_tff_atom_type called on non-atom type"
   295 
   296 (*Used for debugging. Returns all files contained within a directory or its
   297 subdirectories. Follows symbolic links, filters away directories.*)
   298 fun get_file_list path =
   299   let
   300     fun check_file_entry f rest =
   301       let
   302         (*NOTE needed since no File.is_link and File.read_link*)
   303         val f_str = Path.implode f
   304       in
   305         if File.is_dir f then
   306           rest @ get_file_list f
   307         else if OS.FileSys.isLink f_str then
   308           (*follow links -- NOTE this breaks if links are relative paths*)
   309           check_file_entry (Path.explode (OS.FileSys.readLink f_str)) rest
   310         else f :: rest
   311       end
   312   in
   313     File.read_dir path
   314     |> map
   315         (Path.explode
   316         #> Path.append path)
   317     |> (fn l => fold check_file_entry l [])
   318   end
   319 
   320 fun role_to_string role =
   321   case role of
   322       Role_Axiom => "axiom"
   323     | Role_Hypothesis => "hypothesis"
   324     | Role_Definition => "definition"
   325     | Role_Assumption => "assumption"
   326     | Role_Lemma => "lemma"
   327     | Role_Theorem => "theorem"
   328     | Role_Conjecture => "conjecture"
   329     | Role_Negated_Conjecture => "negated_conjecture"
   330     | Role_Plain => "plain"
   331     | Role_Fi_Domain => "fi_domain"
   332     | Role_Fi_Functors => "fi_functors"
   333     | Role_Fi_Predicates => "fi_predicates"
   334     | Role_Type => "type"
   335     | Role_Unknown => "unknown"
   336 
   337 (*accepts a string "'abc'" and returns "abc"*)
   338 fun dequote str : single_quoted =
   339   if str = "" then
   340     raise (DEQUOTE "empty string")
   341   else
   342     (unprefix "'" str
   343     |> unsuffix "'"
   344     handle (Fail str) =>
   345       if str = "unprefix" then
   346         raise DEQUOTE ("string doesn't open with quote:" ^ str)
   347       else if str = "unsuffix" then
   348         raise DEQUOTE ("string doesn't close with quote:" ^ str)
   349       else raise Fail str)
   350 
   351 
   352 (* Printing parsed TPTP formulas *)
   353 (*FIXME this is not pretty-printing, just printing*)
   354 
   355 fun status_to_string status_value =
   356   case status_value of
   357       Suc => "suc"	| Unp => "unp"
   358     | Sap => "sap"  | Esa => "esa"
   359     | Sat => "sat"  | Fsa => "fsa"
   360     | Thm => "thm"  | Wuc => "wuc"
   361     | Eqv => "eqv"	| Tac => "tac"
   362     | Wec => "wec"	| Eth => "eth"
   363     | Tau => "tau"	| Wtc => "wtc"
   364     | Wth => "wth"	| Cax => "cax"
   365     | Sca => "sca"	| Tca => "tca"
   366     | Wca => "wca"	| Cup => "cup"
   367     | Csp => "csp"	| Ecs => "ecs"
   368     | Csa => "csa"	| Cth => "cth"
   369     | Ceq => "ceq"	| Unc => "unc"
   370     | Wcc => "wcc"	| Ect => "ect"
   371     | Fun => "fun"	| Uns => "uns"
   372     | Wct => "wct"	| Scc => "scc"
   373     | Uca => "uca"	| Noc => "noc"
   374 
   375 fun string_of_tptp_term x =
   376   case x of
   377       Term_Func (symbol, tptp_term_list) =>
   378         "(" ^ string_of_symbol symbol ^ " " ^
   379         String.concatWith " " (map string_of_tptp_term tptp_term_list) ^ ")"
   380     | Term_Var str => str
   381     | Term_Conditional (tptp_formula, tptp_term1, tptp_term2) => "" (*FIXME*)
   382     | Term_Num (_, str) => str
   383     | Term_Distinct_Object str => str
   384 
   385 and string_of_symbol (Uninterpreted str) = str
   386   | string_of_symbol (Interpreted_ExtraLogic interpreted_symbol) = string_of_interpreted_symbol interpreted_symbol
   387   | string_of_symbol (Interpreted_Logic logic_symbol) = string_of_logic_symbol logic_symbol
   388   | string_of_symbol (TypeSymbol tptp_base_type) = string_of_tptp_base_type tptp_base_type
   389   | string_of_symbol (System str) = str
   390 
   391 and string_of_tptp_base_type Type_Ind = "$i"
   392   | string_of_tptp_base_type Type_Bool = "$o"
   393   | string_of_tptp_base_type Type_Type = "$tType"
   394   | string_of_tptp_base_type Type_Int = "$int"
   395   | string_of_tptp_base_type Type_Rat = "$rat"
   396   | string_of_tptp_base_type Type_Real = "$real"
   397 
   398 and string_of_interpreted_symbol x =
   399   case x of
   400       UMinus => "$uminus"
   401     | Sum => "$sum"
   402     | Difference => "$difference"
   403     | Product => "$product"
   404     | Quotient => "$quotient"
   405     | Quotient_E => "$quotient_e"
   406     | Quotient_T => "$quotient_t"
   407     | Quotient_F => "$quotient_f"
   408     | Remainder_E => "$remainder_e"
   409     | Remainder_T => "$remainder_t"
   410     | Remainder_F => "$remainder_f"
   411     | Floor => "$floor"
   412     | Ceiling => "$ceiling"
   413     | Truncate => "$truncate"
   414     | Round => "$round"
   415     | To_Int => "$to_int"
   416     | To_Rat => "$to_rat"
   417     | To_Real => "$to_real"
   418     | Less => "$less"
   419     | LessEq => "$lesseq"
   420     | Greater => "$greater"
   421     | GreaterEq => "$greatereq"
   422     | EvalEq => "$evaleq"
   423     | Is_Int => "$is_int"
   424     | Is_Rat => "$is_rat"
   425     | Apply => "@"
   426 
   427 and string_of_logic_symbol Equals = "="
   428   | string_of_logic_symbol NEquals = "!="
   429   | string_of_logic_symbol Or = "|"
   430   | string_of_logic_symbol And = "&"
   431   | string_of_logic_symbol Iff = "<=>"
   432   | string_of_logic_symbol If = "=>"
   433   | string_of_logic_symbol Fi = "<="
   434   | string_of_logic_symbol Xor = "<~>"
   435   | string_of_logic_symbol Nor = "~|"
   436   | string_of_logic_symbol Nand = "~&"
   437   | string_of_logic_symbol Not = "~"
   438   | string_of_logic_symbol Op_Forall = "!!"
   439   | string_of_logic_symbol Op_Exists = "??"
   440   | string_of_logic_symbol True = "$true"
   441   | string_of_logic_symbol False = "$false"
   442 
   443 and string_of_quantifier Forall = "!"
   444   | string_of_quantifier Exists  = "?"
   445   | string_of_quantifier Epsilon  = "@+"
   446   | string_of_quantifier Iota  = "@-"
   447   | string_of_quantifier Lambda  = "^"
   448   | string_of_quantifier Dep_Prod = "!>"
   449   | string_of_quantifier Dep_Sum  = "?*"
   450 
   451 and string_of_tptp_atom (TFF_Typed_Atom (symbol, tptp_type_option)) =
   452     (case tptp_type_option of
   453        NONE => string_of_symbol symbol
   454      | SOME tptp_type =>
   455          string_of_symbol symbol ^ " : " ^ string_of_tptp_type tptp_type)
   456   | string_of_tptp_atom (THF_Atom_term tptp_term) = string_of_tptp_term tptp_term
   457   | string_of_tptp_atom (THF_Atom_conn_term symbol) = string_of_symbol symbol
   458 
   459 and string_of_tptp_formula (Pred (symbol, tptp_term_list)) =
   460       "(" ^ string_of_symbol symbol ^
   461       String.concatWith " " (map string_of_tptp_term tptp_term_list) ^ ")"
   462   | string_of_tptp_formula (Fmla (symbol, tptp_formula_list)) =
   463       "(" ^
   464       string_of_symbol symbol ^
   465       String.concatWith " " (map string_of_tptp_formula tptp_formula_list) ^ ")"
   466   | string_of_tptp_formula (Sequent (tptp_formula_list1, tptp_formula_list2)) = "" (*FIXME*)
   467   | string_of_tptp_formula (Quant (quantifier, varlist, tptp_formula)) =
   468       string_of_quantifier quantifier ^ "[" ^
   469       String.concatWith ", " (map (fn (n, ty) =>
   470         case ty of
   471           NONE => n
   472         | SOME ty => n ^ " : " ^ string_of_tptp_type ty) varlist) ^ "] : (" ^
   473       string_of_tptp_formula tptp_formula ^ ")"
   474   | string_of_tptp_formula (Conditional _) = "" (*FIXME*)
   475   | string_of_tptp_formula (Let _) = "" (*FIXME*)
   476   | string_of_tptp_formula (Atom tptp_atom) = string_of_tptp_atom tptp_atom
   477   | string_of_tptp_formula (THF_type tptp_type) = string_of_tptp_type tptp_type
   478   | string_of_tptp_formula (THF_typing (tptp_formula, tptp_type)) =
   479       string_of_tptp_formula tptp_formula ^ " : " ^ string_of_tptp_type tptp_type
   480 
   481 and string_of_tptp_type (Prod_type (tptp_type1, tptp_type2)) =
   482       string_of_tptp_type tptp_type1 ^ " * " ^ string_of_tptp_type tptp_type2
   483   | string_of_tptp_type (Fn_type (tptp_type1, tptp_type2)) =
   484       string_of_tptp_type tptp_type1 ^ " > " ^ string_of_tptp_type tptp_type2
   485   | string_of_tptp_type (Atom_type str) = str
   486   | string_of_tptp_type (Defined_type tptp_base_type) =
   487       string_of_tptp_base_type tptp_base_type
   488   | string_of_tptp_type (Sum_type (tptp_type1, tptp_type2)) = ""
   489   | string_of_tptp_type (Fmla_type tptp_formula) = string_of_tptp_formula tptp_formula
   490   | string_of_tptp_type (Subtype (symbol1, symbol2)) =
   491       string_of_symbol symbol1 ^ " << " ^ string_of_symbol symbol2
   492 
   493 end