Files
deepmind-research/satore/tests/saturation.rkt
T
Jonathan Schwarz ca532c106c Crediting Alex Matthews as a code contributor
PiperOrigin-RevId: 360859332
2021-03-09 09:49:56 +00:00

349 lines
13 KiB
Racket

#lang racket/base
(require (for-syntax syntax/parse
racket/base)
define2
define2/define-wrapper
global
rackunit
racket/dict
racket/pretty
syntax/parse/define
"../clause.rkt"
"../misc.rkt" ; for easy access to *debug-level*
"../rewrite-tree.rkt"
(prefix-in sat: "../saturation.rkt")
"../unification.rkt")
(define-global *cpu-limit* 10
"Time limit in seconds for tests"
number?
string->number)
(define (Vars+clausify-list l)
(map clausify
(symbol-variables->Vars l)))
(define current-saturation-args #false)
(define-syntax (for-in-list* stx)
(syntax-parse stx
[(_ ([var x ...] ... clauses ...) body ...)
#:with (name ...) (generate-temporaries #'(var ...))
#'(for ((~@ [var (in-list (list x ...))]
[name (in-list '(x ...))]
#:when #true)
...)
(set! current-saturation-args (list (cons 'var name) ...))
body ...)]))
;; Print additional information
(define old-check-handler (current-check-handler))
(current-check-handler
(λ (e)
(eprintf (pretty-format current-saturation-args))
(eprintf "\n")
(old-check-handler e)))
;; USE THIS FOR DEBUGGING
(define-simple-macro (replay-on-failure body ...)
(let ([old-check-handler (current-check-handler)])
(parameterize ([current-check-handler
(λ (e)
(old-check-handler e)
(eprintf
"Some checks have failed. Replaying in interactive mode for debugging.\n")
(*debug-level* 3)
(*cpu-limit* +inf.0)
(let () body ...))])
; encapsulated to avoid collisions
(let () body ...))))
(for-in-list* ([l-res-pruning? #true #false]
[neg-lit-select? #true #false]
[atom<=> KBO1lex<=> atom1<=>]
[dynamic-ok? #true #false]
[rwtree-in=out? #true #false] ; false means no search for new rules
;#:unless (and l-res-pruning? neg-lit-select?) ; can't have both at the same time
)
(define-wrapper (saturation
(sat:saturation input-clauses
#:? [step-limit 200]
#:? [memory-limit 4096] ; in MB
#:? [cpu-limit (*cpu-limit*)] ; in seconds
#:? [rwtree (make-rewrite-tree #:atom<=> atom<=>
#:dynamic-ok? dynamic-ok?)]
#:? [rwtree-out (and rwtree-in=out? rwtree)]
#:? backward-rewrite?
#:? age:cost
#:? cost-type
#:? [disp-proof? #false]
#:? [L-resolvent-pruning? l-res-pruning?]
#:? [negative-literal-selection? neg-lit-select?]))
#:call-wrapped call
(define res (call))
(unless l-res-pruning?
(check-equal? (dict-ref res 'L-resolvent-pruning) 0))
(unless dynamic-ok?
(check-equal? (dict-ref res 'binary-rules-dynamic) 0))
(unless rwtree-in=out?
(check-equal? (dict-ref res 'binary-rules) 0)
(check-true (= (dict-ref res 'binary-rewrites) 0)))
res)
;; Some refutation tests
(check-equal?
(dict-ref (saturation (Vars+clausify-list '( [] )))
'status)
'refuted)
(check-equal?
(dict-ref (saturation (Vars+clausify-list '( [p] )))
'status)
'saturated)
(check-equal?
(dict-ref (saturation (Vars+clausify-list '( [p]
[(not p)])))
'status)
'refuted)
(check-equal?
(dict-ref (saturation (Vars+clausify-list '( [p]
[(not q)])))
'status)
'saturated)
;; WARNING (TODO): If L-resolvents-pruning applied to input clauses too,
;; it would discard the 2nd clause immediately and would saturate!
(replay-on-failure
(check-equal?
(dict-ref (saturation (Vars+clausify-list '( [(p z)]
[(not (p X)) (p (s X))]
[(not q)])))
'status)
'steps))
;; Russell's 'paradox', requires factoring:
(check-equal?
(dict-ref (saturation (Vars+clausify-list
'( [(s X X) (s b X)]
[(not (s X X)) (not (s b X))])))
'status)
'refuted)
;; Second version
(check-equal?
(dict-ref (saturation (Vars+clausify-list
'( [(s X b) (s b X)]
[(not (s X b)) (not (s b X))])))
'status)
'refuted)
; TPTP 100k idx = 348
(check-equal?
(dict-ref (saturation (Vars+clausify-list
'( [(big_f T0_0 T0_1) (big_g T0_0 T0_2)]
[(big_f T1_0 T1_1) (not (big_g T1_0 T1_0))]
[(big_g T2_0 T2_1) (not (big_f T2_0 T2_2))]
[(not (big_f T3_0 T3_1)) (not (big_g T3_0 (esk1_1 T3_0)))])))
'status)
'refuted)
; TPTP 100k idx = 784
(check-equal?
(dict-ref (saturation (Vars+clausify-list '( [p1 p2]
[p1 (not p2)]
[p2 (not p1)]
[(not p1) (not p2)])))
'status)
'refuted)
; TPTP 100k idx = 117
(check-equal?
(dict-ref (saturation
(Vars+clausify-list
'( [(big_f T0_0 T0_1 (esk3_2 T0_0 T0_1))]
[(big_f esk1_0 esk2_0 esk2_0) (not (big_f esk1_0 esk1_0 esk2_0))]
[(big_f esk1_0 esk1_0 esk2_0)
(big_f esk1_0 esk2_0 esk2_0)
(not (big_f esk2_0 esk2_0 T2_0))]
[(big_f esk1_0 esk1_0 esk2_0)
(big_f esk2_0 T3_0 T3_1)
(not (big_f esk1_0 esk2_0 esk2_0))]
[(not (big_f esk1_0 esk1_0 esk2_0))
(not (big_f esk1_0 esk2_0 esk2_0))
(not (big_f esk2_0 esk2_0 T4_0))]
[(big_f esk1_0 esk2_0 esk2_0)
(not (big_f T5_0 T5_1 (esk3_2 T5_1 T5_0)))
(not (big_f esk1_0 esk1_0 esk2_0))]
[(big_f esk1_0 esk1_0 esk2_0)
(not (big_f T6_0 (esk3_2 T6_0 T6_1) (esk3_2 T6_0 T6_1)))
(not (big_f esk1_0 esk2_0 esk2_0))] )))
'status)
'refuted)
(check-equal?
(dict-ref
(saturation
(Vars+clausify-list
'([(p X) (not (p (p X)))]
[(not (p a))]
[(not (q a))]
[(q X) (not (q (q (q X))))]
[(q (q (q (q (q (q (q (q (q (q (q (q (q (q (q a)))))))))))))))])))
'status)
'refuted)
;; This problem shows there may be some loops with implication-removal and factoring!
(check-equal?
(dict-ref
(saturation
(Vars+clausify-list
'([(not (p X Y)) (p X Z) (p Z Y)]
[(p x x)]
[(not (q a a a a b b b b c c c c))]
[(q A A A A B B B B C C C C) (not (q (q A A A A) (q B B B B) (q C C C C)))]
[(q (q a a a a) (q b b b b) (q c c c c))])))
'status)
'refuted)
;; Binary rewrite
;; This example shows that *not* backward rewriting rules can be a problem:
;; Around step 19, there should be immediate resolution to '() with an active clause.
;; But because [(not (p a A))] has not been rewritten to [(notp a A)],
;; it cannot unify to '() immediately, and must wait for a *resolution* between
;; the rule and the clause to pop up from the queue.
(replay-on-failure
(define res
(saturation
(map clausify
'(((notp A B) (p A B)) ; axiom, binary clause
((not (notp A B)) (not (p A B))) ; axiom, converse binary clause
((p a A) (q b B) (r c C) (s d D)) ; these two clauses should resolve to '() immediately
((not (p A a))) ; Note that 'a A' is to prevent unit-clause rewrites
((not (q B b)))
((not (r C c)))
((not (s D d)))
))))
(check-equal? (dict-ref res 'status) 'refuted)
(when rwtree-in=out?
(check > (dict-ref res 'unit-rules) 0)
(check-equal? (dict-ref res 'binary-rules) 2)
(check > (dict-ref res 'binary-rewrites) 0)))
;; 'Asymmetric' rules
(replay-on-failure
(define res
(saturation
(map clausify
'([(not (p A A)) (q A)] ; Not a rule in itself (too general), but enables the next ones
[(p a a) (not (q a))] ; rule (p a a) <-> (q a)
[(p b b) (not (q b))] ; rule (p b b) <-> (q b)
[(p a a) (p b b) (p c c)]
[(not (q a)) (remove-me x Y)]
[(not (q b)) (remove-me x Y)]
[(not (p c c)) (remove-me x Y)]
[(not (remove-me X y))] ; defeats urw
))))
(check-equal? (dict-ref res 'status) 'refuted)
(when rwtree-in=out?
(check-equal? (dict-ref res 'binary-rules) 4)
(check-true (> (dict-ref res 'binary-rewrites) 0))))
;; Same but with rules loaded from a file
;; TODO
#;
(replay-on-failure
(define res
(saturation
(map clausify
'([(not (p A A)) (q A)] ; Not a rule in itself (too general), but enables the next ones
[(p a a) (not (q a))] ; rule (p a a) <-> (q a)
[(p b b) (not (q b))] ; rule (p b b) <-> (q b)
[(p a a) (p b b) (p c c)]
[(not (q a)) (remove-me x Y)]
[(not (q b)) (remove-me x Y)]
[(not (p c c)) (remove-me x Y)]
[(not (remove-me X y))] ; defeats urw
))))
(check-equal? (dict-ref res 'status) 'refuted)
(when rwtree-in=out?
(check-equal? (dict-ref res 'binary-rules) 4)
(check-true (> (dict-ref res 'binary-rewrites) 0))))
;; Greedy selection of binary rewrites can lead to failure
(replay-on-failure
(define res
(saturation
(map clausify
'(; equivalences
[(not (q A B C D)) (p A B C)] ; (q A B C D) <=> (p A B C)
[(q A B C D) (not (p A B C))]
[(not (p A b C)) (t a)] ; (p A b C) <=> (t a)
[(p A b C) (not (t a))]
[(not (q A B c D)) (s b c)] ; (q A b c D) <=> (s b c)
[(q A B c D) (not (s b c))]
; inputs
; may be rewritten to (s b c)
[(q a b c d) (remove-me x Y) (remove-me y Y) (remove-me z Y)]
[(not (t a)) (remove-me x Y) (remove-me y Y) (remove-me z Y)]
;
[(not (remove-me X y))] ; defeats urw
))))
(check-equal? (dict-ref res 'status) 'refuted)
(when rwtree-in=out?
(check-equal? (dict-ref res 'binary-rules) 6)
(check-true (> (dict-ref res 'binary-rewrites) 0))))
;; Overlapping rewrites can lead to failures (but not without rewrites)
(replay-on-failure
(define res
(saturation
(map clausify
'(; equivalences
[(not (q A B C D)) (p A B C)] ; (q A B C D) <=> (p A B C)
[(q A B C D) (not(p A B C))]
[(not (p A b C)) (t a)] ; (p A b C) <=> (t a)
[(p A b C) (not (t a))]
[(not (q A b c D)) (s b c)] ; (q A b c D) <=> (s b c)
[(q A b c D) (not (s b c))]
; inputs
[(s b c) (remove-me x Y) (remove-me y Y) (remove-me z Y)]
[(not (t a)) (remove-me x Y) (remove-me y Y) (remove-me z Y)]
;
[(not (remove-me X y))] ; defeats urw
))))
(check-equal? (dict-ref res 'status) 'refuted)
(when rwtree-in=out?
(check-equal? (dict-ref res 'binary-rules) 6)
(check-true (> (dict-ref res 'binary-rewrites) 0))))
;; Greedy selection of overlapping rewrites can lead to failures
(replay-on-failure
(define res
(saturation
(map clausify
'(; equivalences
[(not (q A B C D)) (p A B C)] ; (q A B C D) <=> (p A B C)
[(q A B C D) (not(p A B C))]
[(not (p A b C)) (r A C)] ; (p A b C) <=> (r A C)
[(p A b C) (not (r A C))]
[(not (r A c)) (t a)] ; (r A c) <=> (t a)
[(r A c) (not (t a))]
[(not (q A b c D)) (s b c)] ; (q A b c D) <=> (s b c)
[(q A b c D) (not (s b c))]
; inputs
[(s b c) (remove-me x Y) (remove-me y Y) (remove-me z Y)]
[(not (t a)) (remove-me x Y) (remove-me y Y) (remove-me z Y)]
;
[(not (remove-me X y))] ; defeats urw
))))
(check-equal? (dict-ref res 'status) 'refuted)
(when rwtree-in=out?
(check-equal? (dict-ref res 'binary-rules) 8)
(check-true (> (dict-ref res 'binary-rewrites) 0)))))