Source code for nltk.sem.logic

# Natural Language Toolkit: Logic
#
# Author: Dan Garrette <dhgarrette@gmail.com>
#
# Copyright (C) 2001-2015 NLTK Project
# URL: <http://nltk.org>
# For license information, see LICENSE.TXT

"""
A version of first order predicate logic, built on
top of the typed lambda calculus.
"""
from __future__ import print_function, unicode_literals

import re
import operator
from collections import defaultdict
from functools import reduce

from nltk.util import Trie
from nltk.internals import Counter
from nltk.compat import (total_ordering, string_types,
                         python_2_unicode_compatible)

APP = 'APP'

_counter = Counter()

class Tokens(object):
    LAMBDA = '\\';     LAMBDA_LIST = ['\\']

    #Quantifiers
    EXISTS = 'exists'; EXISTS_LIST = ['some', 'exists', 'exist']
    ALL = 'all';       ALL_LIST = ['all', 'forall']

    #Punctuation
    DOT = '.'
    OPEN = '('
    CLOSE = ')'
    COMMA = ','

    #Operations
    NOT = '-';         NOT_LIST = ['not', '-', '!']
    AND = '&';         AND_LIST = ['and', '&', '^']
    OR = '|';          OR_LIST = ['or', '|']
    IMP = '->';        IMP_LIST = ['implies', '->', '=>']
    IFF = '<->';       IFF_LIST = ['iff', '<->', '<=>']
    EQ = '=';          EQ_LIST = ['=', '==']
    NEQ = '!=';        NEQ_LIST = ['!=']

    #Collections of tokens
    BINOPS = AND_LIST + OR_LIST + IMP_LIST + IFF_LIST
    QUANTS = EXISTS_LIST + ALL_LIST
    PUNCT = [DOT, OPEN, CLOSE, COMMA]

    TOKENS = BINOPS + EQ_LIST + NEQ_LIST + QUANTS + LAMBDA_LIST + PUNCT + NOT_LIST

    #Special
    SYMBOLS = [x for x in TOKENS if re.match(r'^[-\\.(),!&^|>=<]*$', x)]


[docs]def boolean_ops(): """ Boolean operators """ names = ["negation", "conjunction", "disjunction", "implication", "equivalence"] for pair in zip(names, [Tokens.NOT, Tokens.AND, Tokens.OR, Tokens.IMP, Tokens.IFF]): print("%-15s\t%s" % pair)
[docs]def equality_preds(): """ Equality predicates """ names = ["equality", "inequality"] for pair in zip(names, [Tokens.EQ, Tokens.NEQ]): print("%-15s\t%s" % pair)
[docs]def binding_ops(): """ Binding operators """ names = ["existential", "universal", "lambda"] for pair in zip(names, [Tokens.EXISTS, Tokens.ALL, Tokens.LAMBDA]): print("%-15s\t%s" % pair)
@python_2_unicode_compatible class LogicParser(object): """A lambda calculus expression parser.""" def __init__(self, type_check=False): """ :param type_check: bool should type checking be performed? to their types. """ assert isinstance(type_check, bool) self._currentIndex = 0 self._buffer = [] self.type_check = type_check """A list of tuples of quote characters. The 4-tuple is comprised of the start character, the end character, the escape character, and a boolean indicating whether the quotes should be included in the result. Quotes are used to signify that a token should be treated as atomic, ignoring any special characters within the token. The escape character allows the quote end character to be used within the quote. If True, the boolean indicates that the final token should contain the quote and escape characters. This method exists to be overridden""" self.quote_chars = [] self.operator_precedence = dict( [(x,1) for x in Tokens.LAMBDA_LIST] + \ [(x,2) for x in Tokens.NOT_LIST] + \ [(APP,3)] + \ [(x,4) for x in Tokens.EQ_LIST+Tokens.NEQ_LIST] + \ [(x,5) for x in Tokens.QUANTS] + \ [(x,6) for x in Tokens.AND_LIST] + \ [(x,7) for x in Tokens.OR_LIST] + \ [(x,8) for x in Tokens.IMP_LIST] + \ [(x,9) for x in Tokens.IFF_LIST] + \ [(None,10)]) self.right_associated_operations = [APP] def parse(self, data, signature=None): """ Parse the expression. :param data: str for the input to be parsed :param signature: ``dict<str, str>`` that maps variable names to type strings :returns: a parsed Expression """ data = data.rstrip() self._currentIndex = 0 self._buffer, mapping = self.process(data) try: result = self.process_next_expression(None) if self.inRange(0): raise UnexpectedTokenException(self._currentIndex+1, self.token(0)) except LogicalExpressionException as e: msg = '%s\n%s\n%s^' % (e, data, ' '*mapping[e.index-1]) raise LogicalExpressionException(None, msg) if self.type_check: result.typecheck(signature) return result def process(self, data): """Split the data into tokens""" out = [] mapping = {} tokenTrie = Trie(self.get_all_symbols()) token = '' data_idx = 0 token_start_idx = data_idx while data_idx < len(data): cur_data_idx = data_idx quoted_token, data_idx = self.process_quoted_token(data_idx, data) if quoted_token: if not token: token_start_idx = cur_data_idx token += quoted_token continue st = tokenTrie c = data[data_idx] symbol = '' while c in st: symbol += c st = st[c] if len(data)-data_idx > len(symbol): c = data[data_idx+len(symbol)] else: break if Trie.LEAF in st: #token is a complete symbol if token: mapping[len(out)] = token_start_idx out.append(token) token = '' mapping[len(out)] = data_idx out.append(symbol) data_idx += len(symbol) else: if data[data_idx] in ' \t\n': #any whitespace if token: mapping[len(out)] = token_start_idx out.append(token) token = '' else: if not token: token_start_idx = data_idx token += data[data_idx] data_idx += 1 if token: mapping[len(out)] = token_start_idx out.append(token) mapping[len(out)] = len(data) mapping[len(out)+1] = len(data)+1 return out, mapping def process_quoted_token(self, data_idx, data): token = '' c = data[data_idx] i = data_idx for start, end, escape, incl_quotes in self.quote_chars: if c == start: if incl_quotes: token += c i += 1 while data[i] != end: if data[i] == escape: if incl_quotes: token += data[i] i += 1 if len(data) == i: #if there are no more chars raise LogicalExpressionException(None, "End of input reached. " "Escape character [%s] found at end." % escape) token += data[i] else: token += data[i] i += 1 if len(data) == i: raise LogicalExpressionException(None, "End of input reached. " "Expected: [%s]" % end) if incl_quotes: token += data[i] i += 1 if not token: raise LogicalExpressionException(None, 'Empty quoted token found') break return token, i def get_all_symbols(self): """This method exists to be overridden""" return Tokens.SYMBOLS def inRange(self, location): """Return TRUE if the given location is within the buffer""" return self._currentIndex+location < len(self._buffer) def token(self, location=None): """Get the next waiting token. If a location is given, then return the token at currentIndex+location without advancing currentIndex; setting it gives lookahead/lookback capability.""" try: if location is None: tok = self._buffer[self._currentIndex] self._currentIndex += 1 else: tok = self._buffer[self._currentIndex+location] return tok except IndexError: raise ExpectedMoreTokensException(self._currentIndex+1) def isvariable(self, tok): return tok not in Tokens.TOKENS def process_next_expression(self, context): """Parse the next complete expression from the stream and return it.""" try: tok = self.token() except ExpectedMoreTokensException: raise ExpectedMoreTokensException(self._currentIndex+1, message='Expression expected.') accum = self.handle(tok, context) if not accum: raise UnexpectedTokenException(self._currentIndex, tok, message='Expression expected.') return self.attempt_adjuncts(accum, context) def handle(self, tok, context): """This method is intended to be overridden for logics that use different operators or expressions""" if self.isvariable(tok): return self.handle_variable(tok, context) elif tok in Tokens.NOT_LIST: return self.handle_negation(tok, context) elif tok in Tokens.LAMBDA_LIST: return self.handle_lambda(tok, context) elif tok in Tokens.QUANTS: return self.handle_quant(tok, context) elif tok == Tokens.OPEN: return self.handle_open(tok, context) def attempt_adjuncts(self, expression, context): cur_idx = None while cur_idx != self._currentIndex: #while adjuncts are added cur_idx = self._currentIndex expression = self.attempt_EqualityExpression(expression, context) expression = self.attempt_ApplicationExpression(expression, context) expression = self.attempt_BooleanExpression(expression, context) return expression def handle_negation(self, tok, context): return self.make_NegatedExpression(self.process_next_expression(Tokens.NOT)) def make_NegatedExpression(self, expression): return NegatedExpression(expression) def handle_variable(self, tok, context): #It's either: 1) a predicate expression: sees(x,y) # 2) an application expression: P(x) # 3) a solo variable: john OR x accum = self.make_VariableExpression(tok) if self.inRange(0) and self.token(0) == Tokens.OPEN: #The predicate has arguments if not isinstance(accum, FunctionVariableExpression) and \ not isinstance(accum, ConstantExpression): raise LogicalExpressionException(self._currentIndex, "'%s' is an illegal predicate name. " "Individual variables may not be used as " "predicates." % tok) self.token() #swallow the Open Paren #curry the arguments accum = self.make_ApplicationExpression(accum, self.process_next_expression(APP)) while self.inRange(0) and self.token(0) == Tokens.COMMA: self.token() #swallow the comma accum = self.make_ApplicationExpression(accum, self.process_next_expression(APP)) self.assertNextToken(Tokens.CLOSE) return accum def get_next_token_variable(self, description): try: tok = self.token() except ExpectedMoreTokensException as e: raise ExpectedMoreTokensException(e.index, 'Variable expected.') if isinstance(self.make_VariableExpression(tok), ConstantExpression): raise LogicalExpressionException(self._currentIndex, "'%s' is an illegal variable name. " "Constants may not be %s." % (tok, description)) return Variable(tok) def handle_lambda(self, tok, context): # Expression is a lambda expression if not self.inRange(0): raise ExpectedMoreTokensException(self._currentIndex+2, message="Variable and Expression expected following lambda operator.") vars = [self.get_next_token_variable('abstracted')] while True: if not self.inRange(0) or (self.token(0) == Tokens.DOT and not self.inRange(1)): raise ExpectedMoreTokensException(self._currentIndex+2, message="Expression expected.") if not self.isvariable(self.token(0)): break # Support expressions like: \x y.M == \x.\y.M vars.append(self.get_next_token_variable('abstracted')) if self.inRange(0) and self.token(0) == Tokens.DOT: self.token() #swallow the dot accum = self.process_next_expression(tok) while vars: accum = self.make_LambdaExpression(vars.pop(), accum) return accum def handle_quant(self, tok, context): # Expression is a quantified expression: some x.M factory = self.get_QuantifiedExpression_factory(tok) if not self.inRange(0): raise ExpectedMoreTokensException(self._currentIndex+2, message="Variable and Expression expected following quantifier '%s'." % tok) vars = [self.get_next_token_variable('quantified')] while True: if not self.inRange(0) or (self.token(0) == Tokens.DOT and not self.inRange(1)): raise ExpectedMoreTokensException(self._currentIndex+2, message="Expression expected.") if not self.isvariable(self.token(0)): break # Support expressions like: some x y.M == some x.some y.M vars.append(self.get_next_token_variable('quantified')) if self.inRange(0) and self.token(0) == Tokens.DOT: self.token() #swallow the dot accum = self.process_next_expression(tok) while vars: accum = self.make_QuanifiedExpression(factory, vars.pop(), accum) return accum def get_QuantifiedExpression_factory(self, tok): """This method serves as a hook for other logic parsers that have different quantifiers""" if tok in Tokens.EXISTS_LIST: return ExistsExpression elif tok in Tokens.ALL_LIST: return AllExpression else: self.assertToken(tok, Tokens.QUANTS) def make_QuanifiedExpression(self, factory, variable, term): return factory(variable, term) def handle_open(self, tok, context): #Expression is in parens accum = self.process_next_expression(None) self.assertNextToken(Tokens.CLOSE) return accum def attempt_EqualityExpression(self, expression, context): """Attempt to make an equality expression. If the next token is an equality operator, then an EqualityExpression will be returned. Otherwise, the parameter will be returned.""" if self.inRange(0): tok = self.token(0) if tok in Tokens.EQ_LIST + Tokens.NEQ_LIST and self.has_priority(tok, context): self.token() #swallow the "=" or "!=" expression = self.make_EqualityExpression(expression, self.process_next_expression(tok)) if tok in Tokens.NEQ_LIST: expression = self.make_NegatedExpression(expression) return expression def make_EqualityExpression(self, first, second): """This method serves as a hook for other logic parsers that have different equality expression classes""" return EqualityExpression(first, second) def attempt_BooleanExpression(self, expression, context): """Attempt to make a boolean expression. If the next token is a boolean operator, then a BooleanExpression will be returned. Otherwise, the parameter will be returned.""" while self.inRange(0): tok = self.token(0) factory = self.get_BooleanExpression_factory(tok) if factory and self.has_priority(tok, context): self.token() #swallow the operator expression = self.make_BooleanExpression(factory, expression, self.process_next_expression(tok)) else: break return expression def get_BooleanExpression_factory(self, tok): """This method serves as a hook for other logic parsers that have different boolean operators""" if tok in Tokens.AND_LIST: return AndExpression elif tok in Tokens.OR_LIST: return OrExpression elif tok in Tokens.IMP_LIST: return ImpExpression elif tok in Tokens.IFF_LIST: return IffExpression else: return None def make_BooleanExpression(self, factory, first, second): return factory(first, second) def attempt_ApplicationExpression(self, expression, context): """Attempt to make an application expression. The next tokens are a list of arguments in parens, then the argument expression is a function being applied to the arguments. Otherwise, return the argument expression.""" if self.has_priority(APP, context): if self.inRange(0) and self.token(0) == Tokens.OPEN: if not isinstance(expression, LambdaExpression) and \ not isinstance(expression, ApplicationExpression) and \ not isinstance(expression, FunctionVariableExpression) and \ not isinstance(expression, ConstantExpression): raise LogicalExpressionException(self._currentIndex, ("The function '%s" % expression) + "' is not a Lambda Expression, an " "Application Expression, or a " "functional predicate, so it may " "not take arguments.") self.token() #swallow then open paren #curry the arguments accum = self.make_ApplicationExpression(expression, self.process_next_expression(APP)) while self.inRange(0) and self.token(0) == Tokens.COMMA: self.token() #swallow the comma accum = self.make_ApplicationExpression(accum, self.process_next_expression(APP)) self.assertNextToken(Tokens.CLOSE) return accum return expression def make_ApplicationExpression(self, function, argument): return ApplicationExpression(function, argument) def make_VariableExpression(self, name): return VariableExpression(Variable(name)) def make_LambdaExpression(self, variable, term): return LambdaExpression(variable, term) def has_priority(self, operation, context): return self.operator_precedence[operation] < self.operator_precedence[context] or \ (operation in self.right_associated_operations and \ self.operator_precedence[operation] == self.operator_precedence[context]) def assertNextToken(self, expected): try: tok = self.token() except ExpectedMoreTokensException as e: raise ExpectedMoreTokensException(e.index, message="Expected token '%s'." % expected) if isinstance(expected, list): if tok not in expected: raise UnexpectedTokenException(self._currentIndex, tok, expected) else: if tok != expected: raise UnexpectedTokenException(self._currentIndex, tok, expected) def assertToken(self, tok, expected): if isinstance(expected, list): if tok not in expected: raise UnexpectedTokenException(self._currentIndex, tok, expected) else: if tok != expected: raise UnexpectedTokenException(self._currentIndex, tok, expected) def __repr__(self): if self.inRange(0): msg = 'Next token: ' + self.token(0) else: msg = 'No more tokens' return '<' + self.__class__.__name__ + ': ' + msg + '>'
[docs]def read_logic(s, logic_parser=None, encoding=None): """ Convert a file of First Order Formulas into a list of {Expression}s. :param s: the contents of the file :type s: str :param logic_parser: The parser to be used to parse the logical expression :type logic_parser: LogicParser :param encoding: the encoding of the input string, if it is binary :type encoding: str :return: a list of parsed formulas. :rtype: list(Expression) """ if encoding is not None: s = s.decode(encoding) if logic_parser is None: logic_parser = LogicParser() statements = [] for linenum, line in enumerate(s.splitlines()): line = line.strip() if line.startswith('#') or line=='': continue try: statements.append(logic_parser.parse(line)) except LogicalExpressionException: raise ValueError('Unable to parse line %s: %s' % (linenum, line)) return statements
@total_ordering @python_2_unicode_compatible
[docs]class Variable(object):
[docs] def __init__(self, name): """ :param name: the name of the variable """ assert isinstance(name, string_types), "%s is not a string" % name self.name = name
def __eq__(self, other): return isinstance(other, Variable) and self.name == other.name def __ne__(self, other): return not self == other def __lt__(self, other): if not isinstance(other, Variable): raise TypeError return self.name < other.name
[docs] def substitute_bindings(self, bindings): return bindings.get(self, self)
def __hash__(self): return hash(self.name) def __str__(self): return self.name def __repr__(self): return "Variable('%s')" % self.name
def unique_variable(pattern=None, ignore=None): """ Return a new, unique variable. :param pattern: ``Variable`` that is being replaced. The new variable must be the same type. :param term: a set of ``Variable`` objects that should not be returned from this function. :rtype: Variable """ if pattern is not None: if is_indvar(pattern.name): prefix = 'z' elif is_funcvar(pattern.name): prefix = 'F' elif is_eventvar(pattern.name): prefix = 'e0' else: assert False, "Cannot generate a unique constant" else: prefix = 'z' v = Variable("%s%s" % (prefix, _counter.get())) while ignore is not None and v in ignore: v = Variable("%s%s" % (prefix, _counter.get())) return v def skolem_function(univ_scope=None): """ Return a skolem function over the variables in univ_scope param univ_scope """ skolem = VariableExpression(Variable('F%s' % _counter.get())) if univ_scope: for v in list(univ_scope): skolem = skolem(VariableExpression(v)) return skolem @python_2_unicode_compatible class Type(object): def __repr__(self): return "%s" % self def __hash__(self): return hash("%s" % self) @classmethod def fromstring(cls, s): return read_type(s) @python_2_unicode_compatible class ComplexType(Type): def __init__(self, first, second): assert(isinstance(first, Type)), "%s is not a Type" % first assert(isinstance(second, Type)), "%s is not a Type" % second self.first = first self.second = second def __eq__(self, other): return isinstance(other, ComplexType) and \ self.first == other.first and \ self.second == other.second def __ne__(self, other): return not self == other __hash__ = Type.__hash__ def matches(self, other): if isinstance(other, ComplexType): return self.first.matches(other.first) and \ self.second.matches(other.second) else: return self == ANY_TYPE def resolve(self, other): if other == ANY_TYPE: return self elif isinstance(other, ComplexType): f = self.first.resolve(other.first) s = self.second.resolve(other.second) if f and s: return ComplexType(f,s) else: return None elif self == ANY_TYPE: return other else: return None def __str__(self): if self == ANY_TYPE: return "%s" % ANY_TYPE else: return '<%s,%s>' % (self.first, self.second) def str(self): if self == ANY_TYPE: return ANY_TYPE.str() else: return '(%s -> %s)' % (self.first.str(), self.second.str()) class BasicType(Type): def __eq__(self, other): return isinstance(other, BasicType) and ("%s" % self) == ("%s" % other) def __ne__(self, other): return not self == other __hash__ = Type.__hash__ def matches(self, other): return other == ANY_TYPE or self == other def resolve(self, other): if self.matches(other): return self else: return None @python_2_unicode_compatible class EntityType(BasicType): def __str__(self): return 'e' def str(self): return 'IND' @python_2_unicode_compatible class TruthValueType(BasicType): def __str__(self): return 't' def str(self): return 'BOOL' @python_2_unicode_compatible class EventType(BasicType): def __str__(self): return 'v' def str(self): return 'EVENT' @python_2_unicode_compatible class AnyType(BasicType, ComplexType): def __init__(self): pass @property def first(self): return self @property def second(self): return self def __eq__(self, other): return isinstance(other, AnyType) or other.__eq__(self) def __ne__(self, other): return not self == other __hash__ = Type.__hash__ def matches(self, other): return True def resolve(self, other): return other def __str__(self): return '?' def str(self): return 'ANY' TRUTH_TYPE = TruthValueType() ENTITY_TYPE = EntityType() EVENT_TYPE = EventType() ANY_TYPE = AnyType() def read_type(type_string): assert isinstance(type_string, string_types) type_string = type_string.replace(' ', '') #remove spaces if type_string[0] == '<': assert type_string[-1] == '>' paren_count = 0 for i,char in enumerate(type_string): if char == '<': paren_count += 1 elif char == '>': paren_count -= 1 assert paren_count > 0 elif char == ',': if paren_count == 1: break return ComplexType(read_type(type_string[1 :i ]), read_type(type_string[i+1:-1])) elif type_string[0] == "%s" % ENTITY_TYPE: return ENTITY_TYPE elif type_string[0] == "%s" % TRUTH_TYPE: return TRUTH_TYPE elif type_string[0] == "%s" % ANY_TYPE: return ANY_TYPE else: raise LogicalExpressionException("Unexpected character: '%s'." % type_string[0]) class TypeException(Exception): def __init__(self, msg): Exception.__init__(self, msg) class InconsistentTypeHierarchyException(TypeException): def __init__(self, variable, expression=None): if expression: msg = "The variable '%s' was found in multiple places with different"\ " types in '%s'." % (variable, expression) else: msg = "The variable '%s' was found in multiple places with different"\ " types." % (variable) Exception.__init__(self, msg) class TypeResolutionException(TypeException): def __init__(self, expression, other_type): Exception.__init__(self, "The type of '%s', '%s', cannot be " "resolved with type '%s'" % \ (expression, expression.type, other_type)) class IllegalTypeException(TypeException): def __init__(self, expression, other_type, allowed_type): Exception.__init__(self, "Cannot set type of %s '%s' to '%s'; " "must match type '%s'." % (expression.__class__.__name__, expression, other_type, allowed_type)) def typecheck(expressions, signature=None): """ Ensure correct typing across a collection of ``Expression`` objects. :param expressions: a collection of expressions :param signature: dict that maps variable names to types (or string representations of types) """ #typecheck and create master signature for expression in expressions: signature = expression.typecheck(signature) #apply master signature to all expressions for expression in expressions[:-1]: expression.typecheck(signature) return signature class SubstituteBindingsI(object): """ An interface for classes that can perform substitutions for variables. """ def substitute_bindings(self, bindings): """ :return: The object that is obtained by replacing each variable bound by ``bindings`` with its values. Aliases are already resolved. (maybe?) :rtype: (any) """ raise NotImplementedError() def variables(self): """ :return: A list of all variables in this object. """ raise NotImplementedError() @python_2_unicode_compatible
[docs]class Expression(SubstituteBindingsI): """This is the base abstract object for all logical expressions""" _logic_parser = LogicParser() _type_checking_logic_parser = LogicParser(type_check=True) @classmethod
[docs] def fromstring(cls, s, type_check=False, signature=None): if type_check: return cls._type_checking_logic_parser.parse(s, signature) else: return cls._logic_parser.parse(s, signature)
def __call__(self, other, *additional): accum = self.applyto(other) for a in additional: accum = accum(a) return accum
[docs] def applyto(self, other): assert isinstance(other, Expression), "%s is not an Expression" % other return ApplicationExpression(self, other