src/HOL/TPTP/TPTP_Parser/tptp_syntax.ML
author sultana
Fri, 09 Mar 2012 15:38:55 +0000
changeset 47715 5d9aab0c609c
child 48215 15e579392a68
permissions -rw-r--r--
added tptp parser;
     1 (*  Title:      HOL/TPTP/TPTP_Parser/tptp_syntax.ML
     2     Author:     Nik Sultana, Cambridge University Computer Laboratory
     3 
     4 TPTP abstract syntax and parser-related definitions.
     5 *)
     6 
     7 signature TPTP_SYNTAX =
     8 sig
     9   exception TPTP_SYNTAX of string
    10   val debug: ('a -> unit) -> 'a -> unit
    11 
    12 (*Note that in THF "^ [X] : ^ [Y] : f @ g" should parse
    13   as "(^ [X] : (^ [Y] : f)) @ g"
    14 *)
    15 
    16   datatype number_kind = Int_num | Real_num | Rat_num
    17 
    18   datatype status_value =
    19       Suc | Unp | Sap | Esa | Sat | Fsa
    20     | Thm | Eqv | Tac | Wec | Eth | Tau
    21     | Wtc | Wth | Cax | Sca | Tca | Wca
    22     | Cup | Csp | Ecs | Csa | Cth | Ceq
    23     | Unc | Wcc | Ect | Fun | Uns | Wuc
    24     | Wct | Scc | Uca | Noc
    25 
    26   type name = string
    27   type atomic_word = string
    28   type inference_rule = atomic_word
    29   type file_info = name option
    30   type single_quoted = string
    31   type file_name = single_quoted
    32   type creator_name = atomic_word
    33   type variable = string
    34   type upper_word = string
    35 
    36   datatype language = FOF | CNF | TFF | THF | FOT | TFF_with_arithmetic
    37   and role =
    38      Role_Axiom | Role_Hypothesis | Role_Definition | Role_Assumption |
    39      Role_Lemma | Role_Theorem | Role_Conjecture | Role_Negated_Conjecture |
    40      Role_Plain | Role_Fi_Domain | Role_Fi_Functors | Role_Fi_Predicates |
    41      Role_Type | Role_Unknown
    42 
    43   and general_data = (*Bind of variable * formula_data*)
    44      Atomic_Word of string
    45    | Application of string * general_term list (*general_function*)
    46    | V of upper_word (*variable*)
    47    | Number of number_kind * string
    48    | Distinct_Object of string
    49    | (*formula_data*) Formula_Data of language * tptp_formula (* $thf(<thf_formula>) *)
    50    | (*formula_data*) Term_Data of tptp_term
    51 
    52   and interpreted_symbol =
    53     UMinus | Sum | Difference | Product | Quotient | Quotient_E |
    54     Quotient_T | Quotient_F | Remainder_E | Remainder_T | Remainder_F |
    55     Floor | Ceiling | Truncate | Round | To_Int | To_Rat | To_Real |
    56     (*these should be in defined_pred, but that's not being used in TPTP*)
    57     Less | LessEq | Greater | GreaterEq | EvalEq | Is_Int | Is_Rat |
    58     Apply (*this is just a matter of convenience*)
    59 
    60   and logic_symbol = Equals | NEquals | Or | And | Iff | If | Fi | Xor |
    61     Nor | Nand | Not | Op_Forall | Op_Exists |
    62     (*these should be in defined_pred, but that's not being used in TPTP*)
    63     True | False
    64 
    65   and quantifier = (*interpreted binders*)
    66     Forall | Exists | Epsilon | Iota | Lambda | Dep_Prod | Dep_Sum
    67 
    68   and tptp_base_type =
    69     Type_Ind | Type_Bool | Type_Type | Type_Int | Type_Rat | Type_Real
    70 
    71   and symbol =
    72       Uninterpreted of string
    73     | Interpreted_ExtraLogic of interpreted_symbol
    74     | Interpreted_Logic of logic_symbol
    75     | TypeSymbol of tptp_base_type
    76     | System of string
    77 
    78   and general_term =
    79       General_Data of general_data (*general_data*)
    80     | General_Term of general_data * general_term (*general_data : general_term*)
    81     | General_List of general_term list
    82 
    83   and tptp_term =
    84       Term_Func of symbol * tptp_term list
    85     | Term_Var of string
    86     | Term_Conditional of tptp_formula * tptp_term * tptp_term
    87     | Term_Num of number_kind * string
    88     | Term_Distinct_Object of string
    89 
    90   and tptp_atom =
    91       TFF_Typed_Atom of symbol * tptp_type option (*only TFF*)
    92     | THF_Atom_term of tptp_term   (*from here on, only THF*)
    93     | THF_Atom_conn_term of symbol
    94 
    95   and tptp_formula =
    96       Pred of symbol * tptp_term list
    97     | Fmla of symbol * tptp_formula list
    98     | Sequent of tptp_formula list * tptp_formula list
    99     | Quant of quantifier * (string * tptp_type option) list * tptp_formula
   100     | Conditional of tptp_formula * tptp_formula * tptp_formula
   101     | Let of tptp_let list * tptp_formula
   102     | Atom of tptp_atom
   103     | THF_type of tptp_type
   104     | THF_typing of tptp_formula * tptp_type (*only THF*)
   105 
   106   and tptp_let =
   107       Let_fmla of (string * tptp_type option) * tptp_formula
   108     | Let_term of (string * tptp_type option) * tptp_term  (*only TFF*)
   109 
   110   and tptp_type =
   111       Prod_type of tptp_type * tptp_type
   112     | Fn_type of tptp_type * tptp_type
   113     | Atom_type of string
   114     | Defined_type of tptp_base_type
   115     | Sum_type of tptp_type * tptp_type (*only THF*)
   116     | Fmla_type of tptp_formula (*only THF*)
   117     | Subtype of symbol * symbol (*only THF*)
   118 
   119   type general_list = general_term list
   120   type parent_details = general_list
   121   type useful_info = general_term list
   122   type info = useful_info
   123 
   124   type annotation = general_term * general_term list
   125 
   126   exception DEQUOTE of string
   127 
   128   type position = string * int * int
   129 
   130   datatype tptp_line =
   131       Annotated_Formula of position * language * string * role * tptp_formula * annotation option
   132    |  Include of string * string list
   133 
   134   type tptp_problem = tptp_line list
   135 
   136   val dequote : single_quoted -> single_quoted
   137 
   138   val role_to_string : role -> string
   139 
   140   val status_to_string : status_value -> string
   141 
   142   val nameof_tff_atom_type : tptp_type -> string
   143 
   144   (*Returns the list of all files included in a directory and its
   145   subdirectories. This is only used for testing the parser/interpreter against
   146   all THF problems.*)
   147   val get_file_list : Path.T -> Path.T list
   148 
   149   val string_of_tptp_term : tptp_term -> string
   150   val string_of_tptp_formula : tptp_formula -> string
   151 
   152 end
   153 
   154 
   155 structure TPTP_Syntax : TPTP_SYNTAX =
   156 struct
   157 
   158 exception TPTP_SYNTAX of string
   159 
   160 datatype number_kind = Int_num | Real_num | Rat_num
   161 
   162 datatype status_value =
   163     Suc | Unp | Sap | Esa | Sat | Fsa
   164   | Thm | Eqv | Tac | Wec | Eth | Tau
   165   | Wtc | Wth | Cax | Sca | Tca | Wca
   166   | Cup | Csp | Ecs | Csa | Cth | Ceq
   167   | Unc | Wcc | Ect | Fun | Uns | Wuc
   168   | Wct | Scc | Uca | Noc
   169 
   170 type name = string
   171 type atomic_word = string
   172 type inference_rule = atomic_word
   173 type file_info = name option
   174 type single_quoted = string
   175 type file_name = single_quoted
   176 type creator_name = atomic_word
   177 type variable = string
   178 type upper_word = string
   179 
   180 datatype language = FOF | CNF | TFF | THF | FOT | TFF_with_arithmetic
   181 and role =
   182   Role_Axiom | Role_Hypothesis | Role_Definition | Role_Assumption |
   183   Role_Lemma | Role_Theorem | Role_Conjecture | Role_Negated_Conjecture |
   184   Role_Plain | Role_Fi_Domain | Role_Fi_Functors | Role_Fi_Predicates |
   185   Role_Type | Role_Unknown
   186 and general_data = (*Bind of variable * formula_data*)
   187     Atomic_Word of string
   188   | Application of string * (general_term list)
   189   | V of upper_word (*variable*)
   190   | Number of number_kind * string
   191   | Distinct_Object of string
   192   | (*formula_data*) Formula_Data of language * tptp_formula (* $thf(<thf_formula>) *)
   193   | (*formula_data*) Term_Data of tptp_term
   194 
   195   and interpreted_symbol =
   196     UMinus | Sum | Difference | Product | Quotient | Quotient_E |
   197     Quotient_T | Quotient_F | Remainder_E | Remainder_T | Remainder_F |
   198     Floor | Ceiling | Truncate | Round | To_Int | To_Rat | To_Real |
   199     (*these should be in defined_pred, but that's not being used in TPTP*)
   200     Less | LessEq | Greater | GreaterEq | EvalEq | Is_Int | Is_Rat |
   201     Apply (*this is just a matter of convenience*)
   202 
   203   and logic_symbol = Equals | NEquals | Or | And | Iff | If | Fi | Xor |
   204     Nor | Nand | Not | Op_Forall | Op_Exists |
   205     (*these should be in defined_pred, but that's not being used in TPTP*)
   206     True | False
   207 
   208   and quantifier = (*interpreted binders*)
   209     Forall | Exists | Epsilon | Iota | Lambda | Dep_Prod | Dep_Sum
   210 
   211   and tptp_base_type =
   212     Type_Ind | Type_Bool | Type_Type | Type_Int | Type_Rat | Type_Real
   213 
   214   and symbol =
   215       Uninterpreted of string
   216     | Interpreted_ExtraLogic of interpreted_symbol
   217     | Interpreted_Logic of logic_symbol
   218     | TypeSymbol of tptp_base_type
   219     | System of string
   220 
   221   and general_term =
   222       General_Data of general_data (*general_data*)
   223     | General_Term of general_data * general_term (*general_data : general_term*)
   224     | General_List of general_term list
   225 
   226   and tptp_term =
   227       Term_Func of symbol * tptp_term list
   228     | Term_Var of string
   229     | Term_Conditional of tptp_formula * tptp_term * tptp_term
   230     | Term_Num of number_kind * string
   231     | Term_Distinct_Object of string
   232 
   233   and tptp_atom =
   234       TFF_Typed_Atom of symbol * tptp_type option (*only TFF*)
   235     | THF_Atom_term of tptp_term   (*from here on, only THF*)
   236     | THF_Atom_conn_term of symbol
   237 
   238   and tptp_formula =
   239       Pred of symbol * tptp_term list
   240     | Fmla of symbol * tptp_formula list
   241     | Sequent of tptp_formula list * tptp_formula list
   242     | Quant of quantifier * (string * tptp_type option) list * tptp_formula
   243     | Conditional of tptp_formula * tptp_formula * tptp_formula
   244     | Let of tptp_let list * tptp_formula
   245     | Atom of tptp_atom
   246     | THF_type of tptp_type
   247     | THF_typing of tptp_formula * tptp_type
   248 
   249   and tptp_let =
   250       Let_fmla of (string * tptp_type option) * tptp_formula (*both TFF and THF*)
   251     | Let_term of (string * tptp_type option) * tptp_term  (*only TFF*)
   252 
   253   and tptp_type =
   254       Prod_type of tptp_type * tptp_type
   255     | Fn_type of tptp_type * tptp_type
   256     | Atom_type of string
   257     | Defined_type of tptp_base_type
   258     | Sum_type of tptp_type * tptp_type (*only THF*)
   259     | Fmla_type of tptp_formula (*only THF*)
   260     | Subtype of symbol * symbol (*only THF*)
   261 
   262 type general_list = general_term list
   263 type parent_details = general_list
   264 type useful_info = general_term list
   265 type info = useful_info
   266 
   267 (*type annotation = (source * info option)*)
   268 type annotation = general_term * general_term list
   269 
   270 exception DEQUOTE of string
   271 
   272 (*
   273 datatype defined_functor =
   274   ITE_T | UMINUS | SUM | DIFFERENCE | PRODUCT | QUOTIENT | QUOTIENT_E |
   275   QUOTIENT_T | QUOTIENT_F | REMAINDER_E | REMAINDER_T | REMAINDER_F |
   276   FLOOR | CEILING | TRUNCATE | ROUND | TO_INT | TO_RAT | TO_REAL
   277 *)
   278 
   279 type position = string * int * int
   280 
   281 datatype tptp_line =
   282     Annotated_Formula of position * language * string * role * tptp_formula * annotation option
   283  |  Include of string * string list
   284 
   285 type tptp_problem = tptp_line list
   286 
   287 fun debug f x = if !Runtime.debug then (f x; ()) else ()
   288 
   289 fun nameof_tff_atom_type (Atom_type str) = str
   290   | nameof_tff_atom_type _ = raise TPTP_SYNTAX "nameof_tff_atom_type called on non-atom type"
   291 
   292 (*Used for debugging. Returns all files contained within a directory or its
   293 subdirectories. Follows symbolic links, filters away directories.*)
   294 fun get_file_list path =
   295   let
   296     fun check_file_entry f rest =
   297       let
   298         (*NOTE needed since no File.is_link and File.read_link*)
   299         val f_str = Path.implode f
   300       in
   301         if File.is_dir f then
   302           rest @ get_file_list f
   303         else if OS.FileSys.isLink f_str then
   304           (*follow links -- NOTE this breaks if links are relative paths*)
   305           check_file_entry (Path.explode (OS.FileSys.readLink f_str)) rest
   306         else f :: rest
   307       end
   308   in
   309     File.read_dir path
   310     |> map
   311         (Path.explode
   312         #> Path.append path)
   313     |> (fn l => fold check_file_entry l [])
   314   end
   315 
   316 fun role_to_string role =
   317   case role of
   318       Role_Axiom => "axiom"
   319     | Role_Hypothesis => "hypothesis"
   320     | Role_Definition => "definition"
   321     | Role_Assumption => "assumption"
   322     | Role_Lemma => "lemma"
   323     | Role_Theorem => "theorem"
   324     | Role_Conjecture => "conjecture"
   325     | Role_Negated_Conjecture => "negated_conjecture"
   326     | Role_Plain => "plain"
   327     | Role_Fi_Domain => "fi_domain"
   328     | Role_Fi_Functors => "fi_functors"
   329     | Role_Fi_Predicates => "fi_predicates"
   330     | Role_Type => "type"
   331     | Role_Unknown => "unknown"
   332 
   333 (*accepts a string "'abc'" and returns "abc"*)
   334 fun dequote str : single_quoted =
   335   if str = "" then
   336     raise (DEQUOTE "empty string")
   337   else
   338     (unprefix "'" str
   339     |> unsuffix "'"
   340     handle (Fail str) =>
   341       if str = "unprefix" then
   342         raise DEQUOTE ("string doesn't open with quote:" ^ str)
   343       else if str = "unsuffix" then
   344         raise DEQUOTE ("string doesn't close with quote:" ^ str)
   345       else raise Fail str)
   346 
   347 
   348 (* Printing parsed TPTP formulas *)
   349 (*FIXME this is not pretty-printing, just printing*)
   350 
   351 fun status_to_string status_value =
   352   case status_value of
   353       Suc => "suc"	| Unp => "unp"
   354     | Sap => "sap"  | Esa => "esa"
   355     | Sat => "sat"  | Fsa => "fsa"
   356     | Thm => "thm"  | Wuc => "wuc"
   357     | Eqv => "eqv"	| Tac => "tac"
   358     | Wec => "wec"	| Eth => "eth"
   359     | Tau => "tau"	| Wtc => "wtc"
   360     | Wth => "wth"	| Cax => "cax"
   361     | Sca => "sca"	| Tca => "tca"
   362     | Wca => "wca"	| Cup => "cup"
   363     | Csp => "csp"	| Ecs => "ecs"
   364     | Csa => "csa"	| Cth => "cth"
   365     | Ceq => "ceq"	| Unc => "unc"
   366     | Wcc => "wcc"	| Ect => "ect"
   367     | Fun => "fun"	| Uns => "uns"
   368     | Wct => "wct"	| Scc => "scc"
   369     | Uca => "uca"	| Noc => "noc"
   370 
   371 fun string_of_tptp_term x =
   372   case x of
   373       Term_Func (symbol, tptp_term_list) =>
   374         "(" ^ string_of_symbol symbol ^ " " ^
   375         String.concatWith " " (map string_of_tptp_term tptp_term_list) ^ ")"
   376     | Term_Var str => str
   377     | Term_Conditional (tptp_formula, tptp_term1, tptp_term2) => "" (*FIXME*)
   378     | Term_Num (_, str) => str
   379     | Term_Distinct_Object str => str
   380 
   381 and string_of_symbol (Uninterpreted str) = str
   382   | string_of_symbol (Interpreted_ExtraLogic interpreted_symbol) = string_of_interpreted_symbol interpreted_symbol
   383   | string_of_symbol (Interpreted_Logic logic_symbol) = string_of_logic_symbol logic_symbol
   384   | string_of_symbol (TypeSymbol tptp_base_type) = string_of_tptp_base_type tptp_base_type
   385   | string_of_symbol (System str) = str
   386 
   387 and string_of_tptp_base_type Type_Ind = "$i"
   388   | string_of_tptp_base_type Type_Bool = "$o"
   389   | string_of_tptp_base_type Type_Type = "$tType"
   390   | string_of_tptp_base_type Type_Int = "$int"
   391   | string_of_tptp_base_type Type_Rat = "$rat"
   392   | string_of_tptp_base_type Type_Real = "$real"
   393 
   394 and string_of_interpreted_symbol x =
   395   case x of
   396       UMinus => "$uminus"
   397     | Sum => "$sum"
   398     | Difference => "$difference"
   399     | Product => "$product"
   400     | Quotient => "$quotient"
   401     | Quotient_E => "$quotient_e"
   402     | Quotient_T => "$quotient_t"
   403     | Quotient_F => "$quotient_f"
   404     | Remainder_E => "$remainder_e"
   405     | Remainder_T => "$remainder_t"
   406     | Remainder_F => "$remainder_f"
   407     | Floor => "$floor"
   408     | Ceiling => "$ceiling"
   409     | Truncate => "$truncate"
   410     | Round => "$round"
   411     | To_Int => "$to_int"
   412     | To_Rat => "$to_rat"
   413     | To_Real => "$to_real"
   414     | Less => "$less"
   415     | LessEq => "$lesseq"
   416     | Greater => "$greater"
   417     | GreaterEq => "$greatereq"
   418     | EvalEq => "$evaleq"
   419     | Is_Int => "$is_int"
   420     | Is_Rat => "$is_rat"
   421     | Apply => "@"
   422 
   423 and string_of_logic_symbol Equals = "="
   424   | string_of_logic_symbol NEquals = "!="
   425   | string_of_logic_symbol Or = "|"
   426   | string_of_logic_symbol And = "&"
   427   | string_of_logic_symbol Iff = "<=>"
   428   | string_of_logic_symbol If = "=>"
   429   | string_of_logic_symbol Fi = "<="
   430   | string_of_logic_symbol Xor = "<~>"
   431   | string_of_logic_symbol Nor = "~|"
   432   | string_of_logic_symbol Nand = "~&"
   433   | string_of_logic_symbol Not = "~"
   434   | string_of_logic_symbol Op_Forall = "!!"
   435   | string_of_logic_symbol Op_Exists = "??"
   436   | string_of_logic_symbol True = "$true"
   437   | string_of_logic_symbol False = "$false"
   438 
   439 and string_of_quantifier Forall = "!"
   440   | string_of_quantifier Exists  = "?"
   441   | string_of_quantifier Epsilon  = "@+"
   442   | string_of_quantifier Iota  = "@-"
   443   | string_of_quantifier Lambda  = "^"
   444   | string_of_quantifier Dep_Prod = "!>"
   445   | string_of_quantifier Dep_Sum  = "?*"
   446 
   447 and string_of_tptp_atom (TFF_Typed_Atom (symbol, tptp_type_option)) =
   448     (case tptp_type_option of
   449        NONE => string_of_symbol symbol
   450      | SOME tptp_type =>
   451          string_of_symbol symbol ^ " : " ^ string_of_tptp_type tptp_type)
   452   | string_of_tptp_atom (THF_Atom_term tptp_term) = string_of_tptp_term tptp_term
   453   | string_of_tptp_atom (THF_Atom_conn_term symbol) = string_of_symbol symbol
   454 
   455 and string_of_tptp_formula (Pred (symbol, tptp_term_list)) =
   456       "(" ^ string_of_symbol symbol ^
   457       String.concatWith " " (map string_of_tptp_term tptp_term_list) ^ ")"
   458   | string_of_tptp_formula (Fmla (symbol, tptp_formula_list)) =
   459       "(" ^
   460       string_of_symbol symbol ^
   461       String.concatWith " " (map string_of_tptp_formula tptp_formula_list) ^ ")"
   462   | string_of_tptp_formula (Sequent (tptp_formula_list1, tptp_formula_list2)) = "" (*FIXME*)
   463   | string_of_tptp_formula (Quant (quantifier, varlist, tptp_formula)) =
   464       string_of_quantifier quantifier ^ "[" ^
   465       String.concatWith ", " (map (fn (n, ty) =>
   466         case ty of
   467           NONE => n
   468         | SOME ty => n ^ " : " ^ string_of_tptp_type ty) varlist) ^ "] : (" ^
   469       string_of_tptp_formula tptp_formula ^ ")"
   470   | string_of_tptp_formula (Conditional _) = "" (*FIXME*)
   471   | string_of_tptp_formula (Let _) = "" (*FIXME*)
   472   | string_of_tptp_formula (Atom tptp_atom) = string_of_tptp_atom tptp_atom
   473   | string_of_tptp_formula (THF_type tptp_type) = string_of_tptp_type tptp_type
   474   | string_of_tptp_formula (THF_typing (tptp_formula, tptp_type)) =
   475       string_of_tptp_formula tptp_formula ^ " : " ^ string_of_tptp_type tptp_type
   476 
   477 and string_of_tptp_type (Prod_type (tptp_type1, tptp_type2)) =
   478       string_of_tptp_type tptp_type1 ^ " * " ^ string_of_tptp_type tptp_type2
   479   | string_of_tptp_type (Fn_type (tptp_type1, tptp_type2)) =
   480       string_of_tptp_type tptp_type1 ^ " > " ^ string_of_tptp_type tptp_type2
   481   | string_of_tptp_type (Atom_type str) = str
   482   | string_of_tptp_type (Defined_type tptp_base_type) =
   483       string_of_tptp_base_type tptp_base_type
   484   | string_of_tptp_type (Sum_type (tptp_type1, tptp_type2)) = ""
   485   | string_of_tptp_type (Fmla_type tptp_formula) = string_of_tptp_formula tptp_formula
   486   | string_of_tptp_type (Subtype (symbol1, symbol2)) =
   487       string_of_symbol symbol1 ^ " << " ^ string_of_symbol symbol2
   488 
   489 end