Source code for nnf.dimacs

"""A parser and serializer for the DIMACS
`CNF and SAT formats <https://www.cs.ubc.ca/~hoos/SATLIB/benchm.html>`_."""

import collections
import io
import typing as t
import warnings

from nnf import NNF, Var, And, Or, true, false
from nnf.util import Name

__all__ = (
    "dump",
    "load",
    "dumps",
    "loads",
    "DimacsError",
    "EncodeError",
    "DecodeError",
)


[docs]class DimacsError(Exception):
    pass


[docs]class EncodeError(DimacsError):
    pass


[docs]class DecodeError(DimacsError):
    pass


[docs]def dump(
        obj: NNF,
        fp: t.TextIO,
        *,
        num_variables: t.Optional[int] = None,
        var_labels: t.Optional[t.Dict[Name, int]] = None,
        comment_header: t.Optional[str] = None,
        mode: str = 'sat'
) -> None:
    """Dump a sentence into an open file in a DIMACS format.

    Variable names have to be integers. If the variables in the sentence you
    want to dump are not integers, you can pass a ``var_labels`` dictionary
    to map names to integers.

    :param obj: The sentence to dump.
    :param fp: The open file.
    :param num_variables: Override the number of variables, in case there
                          are variables that don't appear in the sentence.
    :param var_labels: A dictionary mapping variable names to integers,
                       to rename non-integer variables.
    :param comment_header: A comment to include at the top of the file. May
                           include newlines.
    :param mode: Either ``'sat'`` to dump in the general SAT format,
                 or ``'cnf'`` to dump in the specialized CNF format.
    """
    if num_variables is None:
        if var_labels is None:
            names = t.cast("t.FrozenSet[int]", obj.vars())
            for name in names:
                if not isinstance(name, int) or name <= 0:
                    raise EncodeError(
                        "{!r} is not an integer > 0. Try supplying a "
                        "var_labels dictionary.".format(name)
                    )
            num_vars = max(names, default=0)  # type: int
        else:
            num_vars = max(var_labels.values(), default=0)
    else:
        num_vars = num_variables

    if mode == 'sat':
        _dump_sat(obj, fp, num_variables=num_vars, var_labels=var_labels,
                  comment_header=comment_header)
    elif mode == 'cnf':
        _dump_cnf(obj, fp, num_variables=num_vars, var_labels=var_labels,
                  comment_header=comment_header)


def _write_comments(comment_header: str, fp: t.TextIO) -> None:
    for line in comment_header.split('\n'):
        fp.write('c ')
        fp.write(line)
        fp.write('\n')


def _format_var(
        node: Var,
        num_variables: int,
        var_labels: t.Optional[t.Dict[Name, int]] = None
) -> str:
    if var_labels is not None:
        name = var_labels[node.name]
    else:
        name = node.name  # type: ignore
    if not isinstance(name, int) or name <= 0:
        raise EncodeError("{!r} is not an integer > 0".format(name))
    if name > num_variables:
        raise EncodeError("{!r} is more than num_variables".format(name))
    if not node.true:
        return "-{}".format(name)
    return str(name)


def _dump_sat(
        obj: NNF,
        fp: t.TextIO,
        *,
        num_variables: int,
        var_labels: t.Optional[t.Dict[Name, int]] = None,
        comment_header: t.Optional[str] = None
) -> None:
    if comment_header is not None:
        _write_comments(comment_header, fp)

    fp.write("p sat {}\n".format(num_variables))

    def serialize(node: NNF) -> None:
        if isinstance(node, Var):
            fp.write(_format_var(node, num_variables, var_labels))
        elif isinstance(node, (Or, And)):
            fp.write('+(' if isinstance(node, Or) else '*(')
            first = True
            for child in node.children:
                if first:
                    first = False
                else:
                    fp.write(' ')
                serialize(child)
            fp.write(')')
        else:
            raise EncodeError("Can't serialize type {}".format(type(node)))

    fp.write('(')
    serialize(obj)
    fp.write(')')


def _dump_cnf(
        obj: NNF,
        fp: t.TextIO,
        *,
        num_variables: int,
        var_labels: t.Optional[t.Dict[Name, int]] = None,
        comment_header: t.Optional[str] = None
) -> None:
    if not isinstance(obj, And):
        raise EncodeError("CNF sentences must be conjunctions")

    if comment_header is not None:
        _write_comments(comment_header, fp)

    fp.write("p cnf {} {}\n".format(num_variables, len(obj.children)))

    for clause in obj.children:
        if not isinstance(clause, Or):
            raise EncodeError(
                "CNF sentences must be conjunctions of disjunctions"
            )
        first = True
        for child in clause.children:
            if not isinstance(child, Var):
                raise EncodeError(
                    "CNF sentences must be conjunctions of "
                    "disjunctions of variables"
                )
            if not first:
                fp.write(' ')
            else:
                first = False
            fp.write(_format_var(child, num_variables, var_labels))
        fp.write(' 0\n')


[docs]def dumps(
        obj: NNF,
        *,
        num_variables: t.Optional[int] = None,
        var_labels: t.Optional[t.Dict[Name, int]] = None,
        comment_header: t.Optional[str] = None,
        mode: str = 'sat'
) -> str:
    """Like :func:`dump`, but to a string instead of to a file."""
    buffer = io.StringIO()
    dump(obj, buffer, num_variables=num_variables, var_labels=var_labels,
         comment_header=comment_header, mode=mode)
    return buffer.getvalue()


[docs]def load(fp: t.TextIO) -> t.Union[NNF, And[Or[Var]]]:
    """Load a sentence from an open file.

    The format is automatically detected.
    """
    for line in fp:
        if line.startswith('c'):
            continue
        if line.startswith('p '):
            problem = line.split()
            if len(line) < 2:
                raise DecodeError("Malformed problem line")
            fmt = problem[1]
            if 'sat' in fmt or 'SAT' in fmt:
                # problem[2] contains the number of variables
                # but that's currently not explicitly represented
                return _load_sat(fp)
            elif 'cnf' in fmt or 'CNF' in fmt:
                # problem[2] has the number of variables
                # problem[3] has the number of clauses
                return _load_cnf(fp)
            else:
                raise DecodeError("Unknown format '{}'".format(fmt))
        elif line.startswith('nnf '):
            # Might be a DSHARP output file
            from nnf import dsharp
            fp.seek(0)
            return dsharp.load(fp)
        else:
            raise DecodeError(
                "Couldn't find a problem line before an unknown kind of line"
            )
    else:
        raise DecodeError(
            "Couldn't find a problem line before the end of the file"
        )


[docs]def loads(s: str) -> t.Union[NNF, And[Or[Var]]]:
    """Like :func:`load`, but from a string instead of from a file."""
    return load(io.StringIO(s))


def _load_sat(fp: t.TextIO) -> NNF:
    tokens = collections.deque()  # type: t.Deque[str]
    for line in fp:
        if line.startswith('c'):
            continue
        tokens.extend(
            line.replace('(', '( ')
                .replace(')', ' ) ')
                .replace('+(', ' +(')
                .replace('*(', ' *(')
                .replace('-', ' - ')
                .split()
        )
    result = _parse_sat(tokens)
    if tokens:
        warnings.warn("Found extra tokens past the end of the sentence")
    return result


def _parse_sat(tokens: 't.Deque[str]') -> NNF:
    cur = tokens.popleft()
    if cur == '(':
        content = _parse_sat(tokens)
        close = tokens.popleft()
        if close != ')':
            raise DecodeError(
                "Expected closing paren, found {!r}".format(close)
            )
        return content
    elif cur == '-':
        content = _parse_sat(tokens)
        if not isinstance(content, Var):
            raise DecodeError(
                "Only variables can be negated, not {!r}".format(content)
            )
        return ~content
    elif cur == '*(':
        children = []
        while tokens[0] != ')':
            children.append(_parse_sat(tokens))
        tokens.popleft()
        if children:
            return And(children)
        else:
            return true
    elif cur == '+(':
        children = []
        while tokens[0] != ')':
            children.append(_parse_sat(tokens))
        tokens.popleft()
        if children:
            return Or(children)
        else:
            return false
    else:
        return Var(_parse_int(cur))


def _load_cnf(fp: t.TextIO) -> And[Or[Var]]:
    tokens = []  # type: t.List[str]
    for line in fp:
        if line.startswith('c'):
            continue
        tokens.extend(
            line.replace('-', ' -')
                .split()
        )
    return _parse_cnf(tokens)


def _parse_cnf(tokens: t.Iterable[str]) -> And[Or[Var]]:
    clauses = set()  # type: t.Set[Or[Var]]
    clause = set()  # type: t.Set[Var]
    for token in tokens:
        if token == '0':
            clauses.add(Or(clause))
            clause = set()
        elif token == '%':
            # Some example files end with:
            # 0
            # %
            # 0
            # I don't know why.
            break
        elif token.startswith('-'):
            clause.add(Var(_parse_int(token[1:]), False))
        else:
            clause.add(Var(_parse_int(token)))
    if clause:
        # A file may or may not end with a 0
        # Adding an empty clause is not desirable
        clauses.add(Or(clause))
    sentence = And(clauses)
    NNF._is_CNF_loose.set(sentence, True)
    return sentence


def _parse_int(token: str) -> int:
    """Parse an integer, or raise an appropriate exception.

    Arguably a little too accepting, e.g. _parse_int("३") == 3
    (that's U+0969 DEVANAGARI DIGIT THREE)
    """
    try:
        return int(token)
    except ValueError:
        raise DecodeError(
            "Found unexpected token {!r}".format(token)
        ) from None
Source code for nnf.dimacs

python-nnf

Navigation

Related Topics