Source code for nnf.dimacs

"""A parser and serializer for the DIMACS
`CNF and SAT formats <https://www.cs.ubc.ca/~hoos/SATLIB/benchm.html>`_."""

import collections
import io
import typing as t
import warnings

from nnf import NNF, Var, And, Or, true, false
from nnf.util import Name

__all__ = (
    "dump",
    "load",
    "dumps",
    "loads",
    "DimacsError",
    "EncodeError",
    "DecodeError",
)


[docs]class DimacsError(Exception): pass
[docs]class EncodeError(DimacsError): pass
[docs]class DecodeError(DimacsError): pass
[docs]def dump( obj: NNF, fp: t.TextIO, *, num_variables: t.Optional[int] = None, var_labels: t.Optional[t.Dict[Name, int]] = None, comment_header: t.Optional[str] = None, mode: str = 'sat' ) -> None: """Dump a sentence into an open file in a DIMACS format. Variable names have to be integers. If the variables in the sentence you want to dump are not integers, you can pass a ``var_labels`` dictionary to map names to integers. :param obj: The sentence to dump. :param fp: The open file. :param num_variables: Override the number of variables, in case there are variables that don't appear in the sentence. :param var_labels: A dictionary mapping variable names to integers, to rename non-integer variables. :param comment_header: A comment to include at the top of the file. May include newlines. :param mode: Either ``'sat'`` to dump in the general SAT format, or ``'cnf'`` to dump in the specialized CNF format. """ if num_variables is None: if var_labels is None: names = t.cast("t.FrozenSet[int]", obj.vars()) for name in names: if not isinstance(name, int) or name <= 0: raise EncodeError( "{!r} is not an integer > 0. Try supplying a " "var_labels dictionary.".format(name) ) num_vars = max(names, default=0) # type: int else: num_vars = max(var_labels.values(), default=0) else: num_vars = num_variables if mode == 'sat': _dump_sat(obj, fp, num_variables=num_vars, var_labels=var_labels, comment_header=comment_header) elif mode == 'cnf': _dump_cnf(obj, fp, num_variables=num_vars, var_labels=var_labels, comment_header=comment_header)
def _write_comments(comment_header: str, fp: t.TextIO) -> None: for line in comment_header.split('\n'): fp.write('c ') fp.write(line) fp.write('\n') def _format_var( node: Var, num_variables: int, var_labels: t.Optional[t.Dict[Name, int]] = None ) -> str: if var_labels is not None: name = var_labels[node.name] else: name = node.name # type: ignore if not isinstance(name, int) or name <= 0: raise EncodeError("{!r} is not an integer > 0".format(name)) if name > num_variables: raise EncodeError("{!r} is more than num_variables".format(name)) if not node.true: return "-{}".format(name) return str(name) def _dump_sat( obj: NNF, fp: t.TextIO, *, num_variables: int, var_labels: t.Optional[t.Dict[Name, int]] = None, comment_header: t.Optional[str] = None ) -> None: if comment_header is not None: _write_comments(comment_header, fp) fp.write("p sat {}\n".format(num_variables)) def serialize(node: NNF) -> None: if isinstance(node, Var): fp.write(_format_var(node, num_variables, var_labels)) elif isinstance(node, (Or, And)): fp.write('+(' if isinstance(node, Or) else '*(') first = True for child in node.children: if first: first = False else: fp.write(' ') serialize(child) fp.write(')') else: raise EncodeError("Can't serialize type {}".format(type(node))) fp.write('(') serialize(obj) fp.write(')') def _dump_cnf( obj: NNF, fp: t.TextIO, *, num_variables: int, var_labels: t.Optional[t.Dict[Name, int]] = None, comment_header: t.Optional[str] = None ) -> None: if not isinstance(obj, And): raise EncodeError("CNF sentences must be conjunctions") if comment_header is not None: _write_comments(comment_header, fp) fp.write("p cnf {} {}\n".format(num_variables, len(obj.children))) for clause in obj.children: if not isinstance(clause, Or): raise EncodeError( "CNF sentences must be conjunctions of disjunctions" ) first = True for child in clause.children: if not isinstance(child, Var): raise EncodeError( "CNF sentences must be conjunctions of " "disjunctions of variables" ) if not first: fp.write(' ') else: first = False fp.write(_format_var(child, num_variables, var_labels)) fp.write(' 0\n')
[docs]def dumps( obj: NNF, *, num_variables: t.Optional[int] = None, var_labels: t.Optional[t.Dict[Name, int]] = None, comment_header: t.Optional[str] = None, mode: str = 'sat' ) -> str: """Like :func:`dump`, but to a string instead of to a file.""" buffer = io.StringIO() dump(obj, buffer, num_variables=num_variables, var_labels=var_labels, comment_header=comment_header, mode=mode) return buffer.getvalue()
[docs]def load(fp: t.TextIO) -> t.Union[NNF, And[Or[Var]]]: """Load a sentence from an open file. The format is automatically detected. """ for line in fp: if line.startswith('c'): continue if line.startswith('p '): problem = line.split() if len(line) < 2: raise DecodeError("Malformed problem line") fmt = problem[1] if 'sat' in fmt or 'SAT' in fmt: # problem[2] contains the number of variables # but that's currently not explicitly represented return _load_sat(fp) elif 'cnf' in fmt or 'CNF' in fmt: # problem[2] has the number of variables # problem[3] has the number of clauses return _load_cnf(fp) else: raise DecodeError("Unknown format '{}'".format(fmt)) elif line.startswith('nnf '): # Might be a DSHARP output file from nnf import dsharp fp.seek(0) return dsharp.load(fp) else: raise DecodeError( "Couldn't find a problem line before an unknown kind of line" ) else: raise DecodeError( "Couldn't find a problem line before the end of the file" )
[docs]def loads(s: str) -> t.Union[NNF, And[Or[Var]]]: """Like :func:`load`, but from a string instead of from a file.""" return load(io.StringIO(s))
def _load_sat(fp: t.TextIO) -> NNF: tokens = collections.deque() # type: t.Deque[str] for line in fp: if line.startswith('c'): continue tokens.extend( line.replace('(', '( ') .replace(')', ' ) ') .replace('+(', ' +(') .replace('*(', ' *(') .replace('-', ' - ') .split() ) result = _parse_sat(tokens) if tokens: warnings.warn("Found extra tokens past the end of the sentence") return result def _parse_sat(tokens: 't.Deque[str]') -> NNF: cur = tokens.popleft() if cur == '(': content = _parse_sat(tokens) close = tokens.popleft() if close != ')': raise DecodeError( "Expected closing paren, found {!r}".format(close) ) return content elif cur == '-': content = _parse_sat(tokens) if not isinstance(content, Var): raise DecodeError( "Only variables can be negated, not {!r}".format(content) ) return ~content elif cur == '*(': children = [] while tokens[0] != ')': children.append(_parse_sat(tokens)) tokens.popleft() if children: return And(children) else: return true elif cur == '+(': children = [] while tokens[0] != ')': children.append(_parse_sat(tokens)) tokens.popleft() if children: return Or(children) else: return false else: return Var(_parse_int(cur)) def _load_cnf(fp: t.TextIO) -> And[Or[Var]]: tokens = [] # type: t.List[str] for line in fp: if line.startswith('c'): continue tokens.extend( line.replace('-', ' -') .split() ) return _parse_cnf(tokens) def _parse_cnf(tokens: t.Iterable[str]) -> And[Or[Var]]: clauses = set() # type: t.Set[Or[Var]] clause = set() # type: t.Set[Var] for token in tokens: if token == '0': clauses.add(Or(clause)) clause = set() elif token == '%': # Some example files end with: # 0 # % # 0 # I don't know why. break elif token.startswith('-'): clause.add(Var(_parse_int(token[1:]), False)) else: clause.add(Var(_parse_int(token))) if clause: # A file may or may not end with a 0 # Adding an empty clause is not desirable clauses.add(Or(clause)) sentence = And(clauses) NNF._is_CNF_loose.set(sentence, True) return sentence def _parse_int(token: str) -> int: """Parse an integer, or raise an appropriate exception. Arguably a little too accepting, e.g. _parse_int("рей") == 3 (that's U+0969 DEVANAGARI DIGIT THREE) """ try: return int(token) except ValueError: raise DecodeError( "Found unexpected token {!r}".format(token) ) from None