Source code for gristmill.generate

"""Generate source code from optimized computations."""

import functools
import textwrap
import types
import typing

from drudge import TensorDef, Term, Range, prod_
from drudge.term import try_resolve_range
from sympy import (
    Expr, Mul, Pow, Integer, Rational, Add, Indexed, IndexedBase
)
from sympy.printing.ccode import CCodePrinter
from sympy.printing.fcode import FCodePrinter
from sympy.printing.printer import Printer
from sympy.printing.python import PythonPrinter

from .utils import create_jinja_env


[docs]class BasePrinter: """The base class for tensor printers. """
[docs] def __init__(self, scal_printer: Printer, indexed_proc_cb=lambda x: None, add_globals=None, add_filters=None, add_tests=None, add_templ=None): """Initializes a base printer. Parameters ---------- scal_printer The SymPy printer for scalar quantities. indexed_proc_cb It is going to be called with context nodes with ``base`` and ``indices`` (in both the root and for each indexed factors, as described in :py:meth:`transl`) to do additional processing. For most tasks, :py:func:`mangle_base` can be helpful. """ env = create_jinja_env(add_filters, add_globals, add_tests, add_templ) self._env = env self._scal_printer = scal_printer self._indexed_proc = indexed_proc_cb
[docs] def transl(self, tensor_def: TensorDef) -> types.SimpleNamespace: """Translate tensor definition into context for template rendering. This function will translate the given tensor definition into a simple namespace that could be easily used as the context in the actual Jinja template rendering. The context contains fields, base A printed form for the base of the tensor definition. indices A list of external indices. For each entry, keys ``index`` and ``range`` are present to give the printed form of the index and the range it is over. For convenience, ``lower``, ``upper``, and ``size`` have the printed form of lower/upper bounds and the size of the range. We also have ``lower_expr``, ``upper_expr``, and ``size_expr`` for the unprinted expression of them. terms A list of terms for the tensor, with each entry being a simple namespace with keys, sums A list of summations in the tensor term. Its entries are in the same format as the external indices for tarrays. phase ``+`` sign or ``-`` sign. For the phase of the term. numerator The printed form of the numerator of the coefficient of the term. It can be a simple ``1`` string. denominator The printed form of the denominator. indexed_factors The indexed factors of the term. Each is given as a simple namespace with key ``base`` for the printed form of the base, and a key ``indices`` giving the indices to the key, in the same format as the ``indices`` field of the base context. other_factors Factors which are not simple indexed quantity, given as a list of the printed form directly. The actual content of the context can also be customized by overriding the :py:meth:`proc_ctx` in subclasses. """ ctx = types.SimpleNamespace() base = tensor_def.base ctx.base = self._print_scal( base.label if isinstance(base, IndexedBase) else base ) ctx.indices = self._form_indices_ctx(tensor_def.exts) # The stack keeping track of the external and internal indices for range # resolution. indices_dict = dict(tensor_def.exts) resolvers = tensor_def.rhs.drudge.resolvers.value terms = [] ctx.terms = terms # Render each term in turn. for term in tensor_def.rhs_terms: term_ctx = types.SimpleNamespace() terms.append(term_ctx) indices_dict.update(term.sums) term_ctx.sums = self._form_indices_ctx(term.sums) factors, coeff = term.amp_factors coeff = coeff.together() if isinstance(coeff, Mul): coeff_factors = coeff.args else: coeff_factors = (coeff,) phase = 1 numerator = [] denominator = [] for factor in coeff_factors: if isinstance(factor, Integer): if factor.is_negative: phase *= -1 factor = -factor if factor != 1: numerator.append(factor) elif isinstance(factor, Rational): for i, j in [ (factor.p, numerator), (factor.q, denominator) ]: if i < 0: phase *= -1 i = -i if i != 1: j.append(i) elif isinstance(factor, Pow) and factor.args[1].is_negative: denominator.append(1 / factor) else: numerator.append(factor) continue term_ctx.phase = '+' if phase == 1 else '-' for i, j, k in [ (numerator, 'numerator', Add), (denominator, 'denominator', (Add, Mul)) ]: val = prod_(i) printed_val = self._print_scal(val) if isinstance(val, k): printed_val = '(' + printed_val + ')' setattr(term_ctx, j, printed_val) continue indexed_factors = [] term_ctx.indexed_factors = indexed_factors other_factors = [] term_ctx.other_factors = other_factors for factor in factors: if isinstance(factor, Indexed): factor_ctx = types.SimpleNamespace() factor_ctx.base = self._print_scal(factor.base.label) factor_ctx.indices = self._form_indices_ctx(( (i, try_resolve_range(i, indices_dict, resolvers)) for i in factor.indices ), enforce=False) indexed_factors.append(factor_ctx) else: other_factors.append(self._print_scal(factor)) self.proc_ctx(tensor_def, term, ctx, term_ctx) for i, _ in term.sums: del indices_dict[i] continue self.proc_ctx(tensor_def, None, ctx, None) return ctx
[docs] def proc_ctx( self, tensor_def: TensorDef, term: typing.Optional[Term], tensor_entry: types.SimpleNamespace, term_entry: typing.Optional[types.SimpleNamespace] ): """Make additional processing of the rendering context. This method can be override to make additional processing on the rendering context described in :py:meth:`transl` to perform additional customization or to make more information available. It will be called for each of the terms during the processing. And finally it will be called again with the term given as None for a final processing. By default, the indexed quantities nodes are processed by the user-given call-back. """ if term is None: self._indexed_proc(tensor_entry) else: for i in term_entry.indexed_factors: self._indexed_proc(i) continue return
[docs] def render(self, templ_name: str, ctx: types.SimpleNamespace) -> str: """Render the given context for the given template. Meaningful subclass methods can call this function for actual functionality. """ templ = self._env.get_template(templ_name) return templ.render(ctx.__dict__)
def _form_indices_ctx( self, pairs: typing.Iterable[typing.Tuple[Expr, Range]], enforce=True ): """Form indices context. """ res = [] for index, range_ in pairs: if range_ is None or not range_.bounded: if enforce: raise ValueError( 'Invalid range to print', range_, 'for', index, 'expecting a bounded range.' ) else: lower = None upper = None size = None lower_expr = None upper_expr = None size_expr = None else: lower_expr = range_.lower upper_expr = range_.upper size_expr = range_.size lower = self._print_scal(lower_expr) upper = self._print_scal(upper_expr) size = self._print_scal(size_expr) res.append(types.SimpleNamespace( index=self._print_scal(index), range=range_, lower=lower, upper=upper, size=size, lower_expr=lower_expr, upper_expr=upper_expr, size_expr=size_expr )) continue return res def _print_scal(self, expr: Expr): """Print a scalar.""" return self._scal_printer.doprint(expr)
[docs]def mangle_base(func): """Mangle the base names in the indexed nodes in template context. A function taking the printed string for an indexed base and a list of its indices, as described in :py:meth:`BasePrinter.transl`, to return a new mangled base name can be given to get a function call-back compatible with the ``indexed_proc_cb`` argument of :py:meth:`BasePrinter.__init__` constructor. This function can also be used as a function decorator. """ @functools.wraps(func) def _mangle_base(node): """Mangle the base name according to user-given mangling function.""" node.base = func(node.base, node.indices) return return _mangle_base
# # The imperative code printers # ---------------------------- #
[docs]class ImperativeCodePrinter(BasePrinter): """Printer for automatic generation of naive imperative code. This printer supports the printing of the evaluation of tensor expressions by simple loops and arithmetic operations. This is mostly a base class that is going to be subclassed for different languages. For each language, mostly just the options for the language could be given in the super initializer. Most important ones are the printer for the scalar expressions and the formatter of loops, as well as some definition of literals and operators. """
[docs] def __init__(self, scal_printer: Printer, print_indexed_cb, global_indent=1, indent_size=4, max_width=80, line_cont='', breakable_regex=r'(\s*[+-]\s*)', stmt_end='', add_globals=None, add_filters=None, add_tests=None, add_templ=None, **kwargs): """ Initialize the automatic code printer. scal_printer A sympy printer used for the printing of scalar expressions. print_indexed_cb It will be called with the printed base, and the list of indices (as described in :py:meth:`BasePrinter.transl`) to return the string for the printed form. This will be called after the given processing of indexed nodes. global_indent The base global indentation of the generated code. indent_size The size of the indentation. max_width The maximum width for each line. line_cont The string used for indicating line continuation. breakable_regex The regular expression used to break long expressions. stmt_end The ending of the statements. index_paren The pair of parenthesis for indexing arrays. All options to the base class :py:class:`BasePrinter` are also supported. """ # Some globals for template rendering. default_globals = { 'global_indent': global_indent, 'indent_size': indent_size, 'max_width': max_width, 'line_cont': line_cont, 'breakable_regex': breakable_regex, 'stmt_end': stmt_end, } if add_globals is not None: default_globals.update(add_globals) # Initialize the base class. super().__init__( scal_printer, add_globals=default_globals, add_filters=add_filters, add_tests=add_tests, add_templ=add_templ, **kwargs ) self._print_indexed = print_indexed_cb
[docs] def proc_ctx( self, tensor_def: TensorDef, term: typing.Optional[Term], tensor_entry: types.SimpleNamespace, term_entry: typing.Optional[types.SimpleNamespace] ): """Process the context. The indexed nodes will be printed by user-given printer and given to ``indexed`` attributes of the same node. Also the term contexts will be given an attribute named ``amp`` for the whole amplitude part put together. """ # This does the processing of the indexed nodes. super().proc_ctx(tensor_def, term, tensor_entry, term_entry) if term is None: tensor_entry.indexed = self._print_indexed( tensor_entry.base, tensor_entry.indices ) else: factors = [] if term_entry.numerator != '1': factors.append(term_entry.numerator) for i in term_entry.indexed_factors: i.indexed = self._print_indexed(i.base, i.indices) factors.append(i.indexed) continue factors.extend(term_entry.other_factors) parts = [' * '.join(factors)] if term_entry.denominator != 1: parts.extend(['/', term_entry.denominator]) term_entry.amp = ' '.join(parts) return
[docs] def print_eval(self, ctx: types.SimpleNamespace): """Print the evaluation of a tensor definition. """ return self.render('imperative', ctx)
class CPrinter(ImperativeCodePrinter): """C code printer. In this class, just some parameters for C programming language is fixed relative to the base :py:class:`ImperativeCodePrinter`. """ def __init__(self, **kwargs): """Initialize a C code printer. The printer class, the name of the template, the line continuation symbol, and the statement ending will be set automatically. """ super().__init__( CCodePrinter(), lambda base, indices: ''.join([base] + [ '[{}]'.format(i.index) for i in indices ]), line_cont='\\', stmt_end=';', add_filters={ 'form_loop_beg': _form_c_loop_beg, 'form_loop_end': _form_c_loop_end, }, add_globals={ 'zero_literal': '0.0' }, **kwargs ) # # Some filters for C programming language # def _form_c_loop_beg(ctx): """Form the loop beginning for C.""" return 'for({index}={lower}; {index}<{upper}, {index}++)'.format( index=ctx.index, lower=ctx.lower, upper=ctx.upper ) + ' {' def _form_c_loop_end(_): """Form the loop ending for C.""" return '}'
[docs]class FortranPrinter(ImperativeCodePrinter): """Fortran code printer. In this class, just some parameters for the *new* Fortran programming language is fixed relative to the base :py:class:`ImperativeCodePrinter`. """
[docs] def __init__(self, openmp=True, **kwargs): """Initialize a Fortran code printer. The printer class, the name of the template, and the line continuation symbol will be set automatically. """ if openmp: add_templ = { 'tensor_prelude': _FORTRAN_OMP_PARALLEL_PRELUDE, 'tensor_finale': _FORTRAN_OMP_PARALLEL_FINALE, 'init_prelude': _FORTRAN_OMP_INIT_PRELUDE, 'init_finale': _FORTRAN_OMP_INIT_FINALE, 'term_prelude': _FORTRAN_OMP_TERM_PRELUDE, 'term_finale': _FORTRAN_OMP_TERM_FINALE, } else: add_templ = None super().__init__( FCodePrinter(settings={'source_format': 'free'}), lambda base, indices: base + ( '' if len(indices) == 0 else '({})'.format(', '.join( i.index for i in indices )) ), line_cont='&', add_filters={ 'form_loop_beg': self._form_fortran_loop_beg, 'form_loop_end': self._form_fortran_loop_end, }, add_globals={ 'zero_literal': '0.0' }, add_templ=add_templ, **kwargs )
[docs] def print_decl_eval( self, tensor_defs: typing.Iterable[TensorDef], decl_type='real', explicit_bounds=False ) -> typing.Tuple[typing.List[str], typing.List[str]]: """Print Fortran declarations and evaluations of tensor definitions. Parameters ---------- tensor_defs The tensor definitions to print. decl_type The type to be declared for the tarrays. explicit_bounds If the lower and upper bounds should be written explicitly in the declaration. Return ------ decls The list of declaration strings. evals The list of evaluation strings. """ decls = [] evals = [] for tensor_def in tensor_defs: ctx = self.transl(tensor_def) decls.append(self.print_decl(ctx, decl_type, explicit_bounds)) evals.append(self.print_eval(ctx)) continue return decls, evals
[docs] def print_decl( self, ctx, decl_type, explicit_bounds ): """Print the Fortran declaration of the LHS of a tensor definition. A string will be returned that forms the naive declaration of the given tarrays as local variables. """ if len(ctx.indices) > 0: sizes_decl = ', dimension({})'.format(', '.join( ':'.join([self._print_lower(i.lower_expr), i.upper]) if explicit_bounds else i.size for i in ctx.indices )) else: sizes_decl = '' base_indent = int(self._env.globals['global_indent']) * int( self._env.globals['indent_size'] ) indentation = ' ' * base_indent return ''.join([ indentation, decl_type, sizes_decl, ' :: ', ctx.base ])
def _print_lower(self, lower: Expr): """Print the lower bound based on the Fortran convention. """ return self._print_scal(lower + Integer(1)) def _form_fortran_loop_beg(self, ctx): """Form the loop beginning for Fortran.""" lower = self._print_lower(ctx.lower_expr) return 'do {index}={lower}, {upper}'.format( index=ctx.index, lower=lower, upper=ctx.upper ) @staticmethod def _form_fortran_loop_end(_): """Form the loop ending for Fortran.""" return 'end do'
_FORTRAN_OMP_PARALLEL_PRELUDE = """\ !$omp parallel default(shared) """ _FORTRAN_OMP_PARALLEL_FINALE = "!$omp end parallel\n" _FORTRAN_OMP_INIT_PRELUDE = """\ {% if n_ext > 0 %} !$omp do schedule(static) {% else %} !$omp single {% endif %} """ _FORTRAN_OMP_INIT_FINALE = """\ {% if n_ext > 0 %} !$omp end do {% else %} !$omp end single {% endif %} """ _FORTRAN_OMP_TERM_PRELUDE = """\ {% if n_ext > 0 %} !$omp do schedule(static) {% else %} {% if (term.sums | length) > 0 %} !$omp do schedule(static) reduction(+:{{ lhs }}) {% else %} !$omp single {% endif %} {% endif %} """ _FORTRAN_OMP_TERM_FINALE = """\ {% if (n_ext + (term.sums | length)) > 0 %} !$omp end do {% else %} !$omp end single {% endif %} """ # # Einsum printer # -------------- #
[docs]class EinsumPrinter(BasePrinter): """Printer for the einsum function. For tensors that are classical tensor contractions, this printer generates code based on the NumPy ``einsum`` function. For contractions supported, the code from this printer can also be used for Tensorflow. """
[docs] def __init__(self, **kwargs): """Initialize the printer. All keyword arguments are forwarded to the base class :py:class:`BasePrinter`. """ super().__init__(PythonPrinter(), **kwargs)
[docs] def print_eval( self, tensor_defs: typing.Iterable[TensorDef], base_indent=4 ) -> str: """Print the evaluation of the tensor definitions. Parameters ---------- tensor_defs The tensor definitions for the evaluations. base_indent The base indent of the generated code. Return ------ The code for evaluations. """ ctxs = [] for tensor_def in tensor_defs: ctx = self.transl(tensor_def) for i in ctx.terms: if len(i.other_factors) > 0: raise ValueError( 'Factors unable to be handled by einsum', i.other_factors ) continue ctxs.append(ctx) continue code = '\n'.join( self.render('einsum', i) for i in ctxs ) return textwrap.indent(code, ' ' * base_indent)