summaryrefslogtreecommitdiff
path: root/.local/bin/latexrun
diff options
context:
space:
mode:
author0scar <qgt268@alumni.ku.dk>2021-06-10 08:05:06 +0000
committer0scar <qgt268@alumni.ku.dk>2021-06-10 08:05:06 +0000
commit7ad685aa6bd77d76115132280ce323d7fc5aeeb0 (patch)
treec605992dfb5582a050b8847eb624fa84f1f97d91 /.local/bin/latexrun
parent6e4a15d79352ca2707a3b0d81d158228a7d14704 (diff)
Add scripts
Diffstat (limited to '.local/bin/latexrun')
-rwxr-xr-x.local/bin/latexrun1936
1 files changed, 1936 insertions, 0 deletions
diff --git a/.local/bin/latexrun b/.local/bin/latexrun
new file mode 100755
index 0000000..72f741d
--- /dev/null
+++ b/.local/bin/latexrun
@@ -0,0 +1,1936 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2013, 2014 Austin Clements
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+import sys
+import os
+import errno
+import argparse
+import shlex
+import json
+import subprocess
+import re
+import collections
+import hashlib
+import shutil
+import curses
+import filecmp
+import io
+import traceback
+import time
+
+try:
+ import fcntl
+except ImportError:
+ # Non-UNIX platform
+ fcntl = None
+
+def debug(string, *args):
+ if debug.enabled:
+ print(string.format(*args), file=sys.stderr)
+debug.enabled = False
+
+def debug_exc():
+ if debug.enabled:
+ traceback.print_exc()
+
+def main():
+ # Parse command-line
+ arg_parser = argparse.ArgumentParser(
+ description='''A 21st century LaTeX wrapper,
+ %(prog)s runs latex (and bibtex) the right number of times so you
+ don't have to,
+ strips the log spew to make errors visible,
+ and plays well with standard build tools.''')
+ arg_parser.add_argument(
+ '-o', metavar='FILE', dest='output', default=None,
+ help='Output file name (default: derived from input file)')
+ arg_parser.add_argument(
+ '--latex-cmd', metavar='CMD', default='pdflatex',
+ help='Latex command (default: %(default)s)')
+ arg_parser.add_argument(
+ '--latex-args', metavar='ARGS', type=arg_parser_shlex,
+ help='Additional command-line arguments for latex.'
+ ' This will be parsed and split using POSIX shell rules.')
+ arg_parser.add_argument(
+ '--bibtex-cmd', metavar='CMD', default='bibtex',
+ help='Bibtex command (default: %(default)s)')
+ arg_parser.add_argument(
+ '--bibtex-args', metavar='ARGS', type=arg_parser_shlex,
+ help='Additional command-line arguments for bibtex')
+ arg_parser.add_argument(
+ '--max-iterations', metavar='N', type=int, default=10,
+ help='Max number of times to run latex before giving up'
+ ' (default: %(default)s)')
+ arg_parser.add_argument(
+ '-W', metavar='(no-)CLASS',
+ action=ArgParserWarnAction, dest='nowarns', default=set(['underfull']),
+ help='Enable/disable warning from CLASS, which can be any package name, '
+ 'LaTeX warning class (e.g., font), bad box type '
+ '(underfull, overfull, loose, tight), or "all"')
+ arg_parser.add_argument(
+ '-O', metavar='DIR', dest='obj_dir', default='latex.out',
+ help='Directory for intermediate files and control database '
+ '(default: %(default)s)')
+ arg_parser.add_argument(
+ '--color', choices=('auto', 'always', 'never'), default='auto',
+ help='When to colorize messages')
+ arg_parser.add_argument(
+ '--verbose-cmds', action='store_true', default=False,
+ help='Print commands as they are executed')
+ arg_parser.add_argument(
+ '--debug', action='store_true',
+ help='Enable detailed debug output')
+ actions = arg_parser.add_argument_group('actions')
+ actions.add_argument(
+ '--clean-all', action='store_true', help='Delete output files')
+ actions.add_argument(
+ 'file', nargs='?', help='.tex file to compile')
+ args = arg_parser.parse_args()
+ if not any([args.clean_all, args.file]):
+ arg_parser.error('at least one action is required')
+ args.latex_args = args.latex_args or []
+ args.bibtex_args = args.bibtex_args or []
+
+ verbose_cmd.enabled = args.verbose_cmds
+ debug.enabled = args.debug
+
+ # A note about encodings: POSIX encoding is a mess; TeX encoding
+ # is a disaster. Our goal is to make things no worse, so we want
+ # byte-accurate round-tripping of TeX messages. Since TeX
+ # messages are *basically* text, we use strings and
+ # surrogateescape'ing for both input and output. I'm not fond of
+ # setting surrogateescape globally, but it's far easier than
+ # dealing with every place we pass TeX output through.
+ # Conveniently, JSON can round-trip surrogateescape'd strings, so
+ # our control database doesn't need special handling.
+ sys.stdout = io.TextIOWrapper(
+ sys.stdout.buffer, encoding=sys.stdout.encoding,
+ errors='surrogateescape', line_buffering=sys.stdout.line_buffering)
+ sys.stderr = io.TextIOWrapper(
+ sys.stderr.buffer, encoding=sys.stderr.encoding,
+ errors='surrogateescape', line_buffering=sys.stderr.line_buffering)
+
+ Message.setup_color(args.color)
+
+ # Open control database.
+ dbpath = os.path.join(args.obj_dir, '.latexrun.db')
+ if not os.path.exists(dbpath) and os.path.exists('.latexrun.db'):
+ # The control database used to live in the source directory.
+ # Support this for backwards compatibility.
+ dbpath = '.latexrun.db'
+ try:
+ db = DB(dbpath)
+ except (ValueError, OSError) as e:
+ print('error opening {}: {}'.format(e.filename if hasattr(e, 'filename')
+ else dbpath, e),
+ file=sys.stderr)
+ debug_exc()
+ sys.exit(1)
+
+ # Clean
+ if args.clean_all:
+ try:
+ db.do_clean(args.obj_dir)
+ except OSError as e:
+ print(e, file=sys.stderr)
+ debug_exc()
+ sys.exit(1)
+
+ # Build
+ if not args.file:
+ return
+ task_commit = None
+ try:
+ task_latex = LaTeX(db, args.file, args.latex_cmd, args.latex_args,
+ args.obj_dir, args.nowarns)
+ task_commit = LaTeXCommit(db, task_latex, args.output)
+ task_bibtex = BibTeX(db, task_latex, args.bibtex_cmd, args.bibtex_args,
+ args.nowarns, args.obj_dir)
+ tasks = [task_latex, task_commit, task_bibtex]
+ stable = run_tasks(tasks, args.max_iterations)
+
+ # Print final task output and gather exit status
+ status = 0
+ for task in tasks:
+ status = max(task.report(), status)
+
+ if not stable:
+ print('error: files are still changing after {} iterations; giving up'
+ .format(args.max_iterations), file=sys.stderr)
+ status = max(status, 1)
+ except TaskError as e:
+ print(str(e), file=sys.stderr)
+ debug_exc()
+ status = 1
+
+ # Report final status, if interesting
+ fstatus = 'There were errors' if task_commit is None else task_commit.status
+ if fstatus:
+ output = args.output
+ if output is None:
+ if task_latex.get_outname() is not None:
+ output = os.path.basename(task_latex.get_outname())
+ else:
+ output = 'output'
+ if Message._color:
+ terminfo.send('bold', ('setaf', 1))
+ print('{}; {} not updated'.format(fstatus, output))
+ if Message._color:
+ terminfo.send('sgr0')
+ sys.exit(status)
+
+def arg_parser_shlex(string):
+ """Argument parser for shell token lists."""
+ try:
+ return shlex.split(string)
+ except ValueError as e:
+ raise argparse.ArgumentTypeError(str(e)) from None
+
+class ArgParserWarnAction(argparse.Action):
+ def __call__(self, parser, namespace, value, option_string=None):
+ nowarn = getattr(namespace, self.dest)
+ if value == 'all':
+ nowarn.clear()
+ elif value.startswith('no-'):
+ nowarn.add(value[3:])
+ else:
+ nowarn.discard(value)
+ setattr(namespace, self.dest, nowarn)
+
+def verbose_cmd(args, cwd=None, env=None):
+ if verbose_cmd.enabled:
+ cmd = ' '.join(map(shlex.quote, args))
+ if cwd is not None:
+ cmd = '(cd {} && {})'.format(shlex.quote(cwd), cmd)
+ if env is not None:
+ for k, v in env.items():
+ if os.environ.get(k) != v:
+ cmd = '{}={} {}'.format(k, shlex.quote(v), cmd)
+ print(cmd, file=sys.stderr)
+verbose_cmd.enabled = False
+
+def mkdir_p(path):
+ try:
+ os.makedirs(path)
+ except OSError as exc:
+ if exc.errno == errno.EEXIST and os.path.isdir(path):
+ pass
+ else: raise
+
+class DB:
+ """A latexrun control database."""
+
+ _VERSION = 'latexrun-db-v2'
+
+ def __init__(self, filename):
+ self.__filename = filename
+
+ # Make sure database directory exists
+ if os.path.dirname(self.__filename):
+ os.makedirs(os.path.dirname(self.__filename), exist_ok=True)
+
+ # Lock the database if possible. We don't release this lock
+ # until the process exits.
+ lockpath = self.__filename + '.lock'
+ if fcntl is not None:
+ lockfd = os.open(lockpath, os.O_CREAT|os.O_WRONLY, 0o666)
+ # Note that this is actually an fcntl lock, not a lockf
+ # lock. Don't be fooled.
+ fcntl.lockf(lockfd, fcntl.LOCK_EX, 1)
+
+ try:
+ fp = open(filename, 'r')
+ except FileNotFoundError:
+ debug('creating new database')
+ self.__val = {'version': DB._VERSION}
+ else:
+ debug('loading database')
+ self.__val = json.load(fp)
+ if 'version' not in self.__val:
+ raise ValueError('file exists, but does not appear to be a latexrun database'.format(filename))
+ if self.__val['version'] != DB._VERSION:
+ raise ValueError('unknown database version {!r}'
+ .format(self.__val['version']))
+
+ def commit(self):
+ debug('committing database')
+ # Atomically commit database
+ tmp_filename = self.__filename + '.tmp'
+ with open(tmp_filename, 'w') as fp:
+ json.dump(self.__val, fp, indent=2, separators=(',', ': '))
+ fp.flush()
+ os.fsync(fp.fileno())
+ os.rename(tmp_filename, self.__filename)
+
+ def get_summary(self, task_id):
+ """Return the recorded summary for the given task or None."""
+ return self.__val.get('tasks', {}).get(task_id)
+
+ def set_summary(self, task_id, summary):
+ """Set the summary for the given task."""
+ self.__val.setdefault('tasks', {})[task_id] = summary
+
+ def add_clean(self, filename):
+ """Add an output file to be cleaned.
+
+ Unlike the output files recorded in the task summaries,
+ cleanable files strictly accumulate until a clean is
+ performed.
+ """
+ self.__val.setdefault('clean', {})[filename] = hash_cache.get(filename)
+
+ def do_clean(self, obj_dir=None):
+ """Remove output files and delete database.
+
+ If obj_dir is not None and it is empty after all files are
+ removed, it will also be removed.
+ """
+
+ for f, want_hash in self.__val.get('clean', {}).items():
+ have_hash = hash_cache.get(f)
+ if have_hash is not None:
+ if want_hash == have_hash:
+ debug('unlinking {}', f)
+ hash_cache.invalidate(f)
+ os.unlink(f)
+ else:
+ print('warning: {} has changed; not removing'.format(f),
+ file=sys.stderr)
+ self.__val = {'version': DB._VERSION}
+ try:
+ os.unlink(self.__filename)
+ except FileNotFoundError:
+ pass
+ if obj_dir is not None:
+ try:
+ os.rmdir(obj_dir)
+ except OSError:
+ pass
+
+class HashCache:
+ """Cache of file hashes.
+
+ As latexrun reaches fixed-point, it hashes the same files over and
+ over, many of which never change. Since hashing is somewhat
+ expensive, we keep a simple cache of these hashes.
+ """
+
+ def __init__(self):
+ self.__cache = {}
+
+ def get(self, filename):
+ """Return the hash of filename, or * if it was clobbered."""
+ try:
+ with open(filename, 'rb') as fp:
+ st = os.fstat(fp.fileno())
+ key = (st.st_dev, st.st_ino)
+ if key in self.__cache:
+ return self.__cache[key]
+
+ debug('hashing {}', filename)
+ h = hashlib.sha256()
+ while True:
+ block = fp.read(256*1024)
+ if not len(block):
+ break
+ h.update(block)
+ self.__cache[key] = h.hexdigest()
+ return self.__cache[key]
+ except (FileNotFoundError, IsADirectoryError):
+ return None
+
+ def clobber(self, filename):
+ """If filename's hash is not known, record an invalid hash.
+
+ This can be used when filename was overwritten before we were
+ necessarily able to obtain its hash. filename must exist.
+ """
+ st = os.stat(filename)
+ key = (st.st_dev, st.st_ino)
+ if key not in self.__cache:
+ self.__cache[key] = '*'
+
+ def invalidate(self, filename):
+ try:
+ st = os.stat(filename)
+ except OSError as e:
+ # Pessimistically wipe the whole cache
+ debug('wiping hash cache ({})', e)
+ self.__cache.clear()
+ else:
+ key = (st.st_dev, st.st_ino)
+ if key in self.__cache:
+ del self.__cache[key]
+hash_cache = HashCache()
+
+class _Terminfo:
+ def __init__(self):
+ self.__tty = os.isatty(sys.stdout.fileno())
+ if self.__tty:
+ curses.setupterm()
+ self.__ti = {}
+
+ def __ensure(self, cap):
+ if cap not in self.__ti:
+ if not self.__tty:
+ string = None
+ else:
+ string = curses.tigetstr(cap)
+ if string is None or b'$<' in string:
+ # Don't have this capability or it has a pause
+ string = None
+ self.__ti[cap] = string
+ return self.__ti[cap]
+
+ def has(self, *caps):
+ return all(self.__ensure(cap) is not None for cap in caps)
+
+ def send(self, *caps):
+ # Flush TextIOWrapper to the binary IO buffer
+ sys.stdout.flush()
+ for cap in caps:
+ # We should use curses.putp here, but it's broken in
+ # Python3 because it writes directly to C's buffered
+ # stdout and there's no way to flush that.
+ if isinstance(cap, tuple):
+ s = curses.tparm(self.__ensure(cap[0]), *cap[1:])
+ else:
+ s = self.__ensure(cap)
+ sys.stdout.buffer.write(s)
+terminfo = _Terminfo()
+
+class Progress:
+ _enabled = None
+
+ def __init__(self, prefix):
+ self.__prefix = prefix
+ if Progress._enabled is None:
+ Progress._enabled = (not debug.enabled) and \
+ terminfo.has('cr', 'el', 'rmam', 'smam')
+
+ def __enter__(self):
+ self.last = ''
+ self.update('')
+ return self
+
+ def __exit__(self, typ, value, traceback):
+ if Progress._enabled:
+ # Beginning of line and clear
+ terminfo.send('cr', 'el')
+ sys.stdout.flush()
+
+ def update(self, msg):
+ if not Progress._enabled:
+ return
+ out = '[' + self.__prefix + ']'
+ if msg:
+ out += ' ' + msg
+ if out != self.last:
+ # Beginning of line, clear line, disable wrap
+ terminfo.send('cr', 'el', 'rmam')
+ sys.stdout.write(out)
+ # Enable wrap
+ terminfo.send('smam')
+ self.last = out
+ sys.stdout.flush()
+
+class Message(collections.namedtuple(
+ 'Message', 'typ filename lineno msg')):
+ def emit(self):
+ if self.filename:
+ if self.filename.startswith('./'):
+ finfo = self.filename[2:]
+ else:
+ finfo = self.filename
+ else:
+ finfo = '<no file>'
+ if self.lineno is not None:
+ finfo += ':' + str(self.lineno)
+ finfo += ': '
+ if self._color:
+ terminfo.send('bold')
+ sys.stdout.write(finfo)
+
+ if self.typ != 'info':
+ if self._color:
+ terminfo.send(('setaf', 5 if self.typ == 'warning' else 1))
+ sys.stdout.write(self.typ + ': ')
+ if self._color:
+ terminfo.send('sgr0')
+ sys.stdout.write(self.msg + '\n')
+
+ @classmethod
+ def setup_color(cls, state):
+ if state == 'never':
+ cls._color = False
+ elif state == 'always':
+ cls._color = True
+ elif state == 'auto':
+ cls._color = terminfo.has('setaf', 'bold', 'sgr0')
+ else:
+ raise ValueError('Illegal color state {:r}'.format(state))
+
+
+##################################################################
+# Task framework
+#
+
+terminate_task_loop = False
+start_time = time.time()
+
+def run_tasks(tasks, max_iterations):
+ """Execute tasks in round-robin order until all are stable.
+
+ This will also exit if terminate_task_loop is true. Tasks may use
+ this to terminate after a fatal error (even if that fatal error
+ doesn't necessarily indicate stability; as long as re-running the
+ task will never eliminate the fatal error).
+
+ Return True if fixed-point is reached or terminate_task_loop is
+ set within max_iterations iterations.
+ """
+
+ global terminate_task_loop
+ terminate_task_loop = False
+
+ nstable = 0
+ for iteration in range(max_iterations):
+ for task in tasks:
+ if task.stable():
+ nstable += 1
+ if nstable == len(tasks):
+ debug('fixed-point reached')
+ return True
+ else:
+ task.run()
+ nstable = 0
+ if terminate_task_loop:
+ debug('terminate_task_loop set')
+ return True
+ debug('fixed-point not reached')
+ return False
+
+class TaskError(Exception):
+ pass
+
+class Task:
+ """A deterministic computation whose inputs and outputs can be captured."""
+
+ def __init__(self, db, task_id):
+ self.__db = db
+ self.__task_id = task_id
+
+ def __debug(self, string, *args):
+ if debug.enabled:
+ debug('task {}: {}', self.__task_id, string.format(*args))
+
+ def stable(self):
+ """Return True if running this task will not affect system state.
+
+ Functionally, let f be the task, and s be the system state.
+ Then s' = f(s). If it must be that s' == s (that is, f has
+ reached a fixed point), then this function must return True.
+ """
+ last_summary = self.__db.get_summary(self.__task_id)
+ if last_summary is None:
+ # Task has never run, so running it will modify system
+ # state
+ changed = 'never run'
+ else:
+ # If any of the inputs have changed since the last run of
+ # this task, the result may change, so re-run the task.
+ # Also, it's possible something else changed an output
+ # file, in which case we also want to re-run the task, so
+ # check the outputs, too.
+ changed = self.__summary_changed(last_summary)
+
+ if changed:
+ self.__debug('unstable (changed: {})', changed)
+ return False
+ else:
+ self.__debug('stable')
+ return True
+
+ def __summary_changed(self, summary):
+ """Test if any inputs changed from summary.
+
+ Returns a string describing the changed input, or None.
+ """
+ for dep in summary['deps']:
+ fn, args, val = dep
+ method = getattr(self, '_input_' + fn, None)
+ if method is None:
+ return 'unknown dependency method {}'.format(fn)
+ if method == self._input_unstable or method(*args) != val:
+ return '{}{}'.format(fn, tuple(args))
+ return None
+
+ def _input(self, name, *args):
+ """Register an input for this run.
+
+ This calls self._input_<name>(*args) to get the value of this
+ input. This function should run quickly and return some
+ projection of system state that affects the result of this
+ computation.
+
+ Both args and the return value must be JSON serializable.
+ """
+ method = getattr(self, '_input_' + name)
+ val = method(*args)
+ if [name, args, val] not in self.__deps:
+ self.__deps.append([name, args, val])
+ return val
+
+ def run(self):
+ # Before we run the task, pre-hash any files that were output
+ # files in the last run. These may be input by this run and
+ # then clobbered, at which point it will be too late to get an
+ # input hash. Ideally we would only hash files that were
+ # *both* input and output files, but latex doesn't tell us
+ # about input files that didn't exist, so if we start from a
+ # clean slate, we often require an extra run because we don't
+ # know a file is input/output until after the second run.
+ last_summary = self.__db.get_summary(self.__task_id)
+ if last_summary is not None:
+ for io_filename in last_summary['output_files']:
+ self.__debug('pre-hashing {}', io_filename)
+ hash_cache.get(io_filename)
+
+ # Run the task
+ self.__debug('running')
+ self.__deps = []
+ result = self._execute()
+
+ # Clear cached output file hashes
+ for filename in result.output_filenames:
+ hash_cache.invalidate(filename)
+
+ # If the output files change, then the computation needs to be
+ # re-run, so record them as inputs
+ for filename in result.output_filenames:
+ self._input('file', filename)
+
+ # Update task summary in database
+ self.__db.set_summary(self.__task_id,
+ self.__make_summary(self.__deps, result))
+ del self.__deps
+
+ # Add output files to be cleaned
+ for f in result.output_filenames:
+ self.__db.add_clean(f)
+
+ try:
+ self.__db.commit()
+ except OSError as e:
+ raise TaskError('error committing control database {}: {}'.format(
+ getattr(e, 'filename', '<unknown path>'), e)) from e
+
+ def __make_summary(self, deps, run_result):
+ """Construct a new task summary."""
+ return {
+ 'deps': deps,
+ 'output_files': {f: hash_cache.get(f)
+ for f in run_result.output_filenames},
+ 'extra': run_result.extra,
+ }
+
+ def _execute(self):
+ """Abstract: Execute this task.
+
+ Subclasses should implement this method to execute this task.
+ This method must return a RunResult giving the inputs that
+ were used by the task and the outputs it produced.
+ """
+ raise NotImplementedError('Task._execute is abstract')
+
+ def _get_result_extra(self):
+ """Return the 'extra' result from the previous run, or None."""
+ summary = self.__db.get_summary(self.__task_id)
+ if summary is None:
+ return None
+ return summary['extra']
+
+ def report(self):
+ """Report the task's results to stdout and return exit status.
+
+ This may be called when the task has never executed.
+ Subclasses should override this. The default implementation
+ reports nothing and returns 0.
+ """
+ return 0
+
+ # Standard input functions
+
+ def _input_env(self, var):
+ return os.environ.get(var)
+
+ def _input_file(self, path):
+ return hash_cache.get(path)
+
+ def _input_unstable(self):
+ """Mark this run as unstable, regardless of other inputs."""
+ return None
+
+ def _input_unknown_input(self):
+ """An unknown input that may change after latexrun exits.
+
+ This conservatively marks some unknown input that definitely
+ won't change while latexrun is running, but may change before
+ the user next runs latexrun. This allows the task to
+ stabilize during this invocation, but will cause the task to
+ re-run on the next invocation.
+ """
+ return start_time
+
+class RunResult(collections.namedtuple(
+ 'RunResult', 'output_filenames extra')):
+ """The result of a single task execution.
+
+ This captures all files written by the task, and task-specific
+ results that need to be persisted between runs (for example, to
+ enable reporting of a task's results).
+ """
+ pass
+
+##################################################################
+# LaTeX task
+#
+
+def normalize_input_path(path):
+ # Resolve the directory of the input path, but leave the file
+ # component alone because it affects TeX's behavior.
+ head, tail = os.path.split(path)
+ npath = os.path.join(os.path.realpath(head), tail)
+ return os.path.relpath(path)
+
+class LaTeX(Task):
+ def __init__(self, db, tex_filename, cmd, cmd_args, obj_dir, nowarns):
+ super().__init__(db, 'latex::' + normalize_input_path(tex_filename))
+ self.__tex_filename = tex_filename
+ self.__cmd = cmd
+ self.__cmd_args = cmd_args
+ self.__obj_dir = obj_dir
+ self.__nowarns = nowarns
+
+ self.__pass = 0
+
+ def _input_args(self):
+ # If filename starts with a character the tex command-line
+ # treats specially, then tweak it so it doesn't.
+ filename = self.__tex_filename
+ if filename.startswith(('-', '&', '\\')):
+ filename = './' + filename
+ # XXX Put these at the beginning in case the provided
+ # arguments are malformed. Might want to do a best-effort
+ # check for incompatible user-provided arguments (note:
+ # arguments can be given with one or two dashes and those with
+ # values can use an equals or a space).
+ return [self.__cmd] + self.__cmd_args + \
+ ['-interaction', 'nonstopmode', '-recorder',
+ '-output-directory', self.__obj_dir, filename]
+
+ def _execute(self):
+ # Run latex
+ self.__pass += 1
+ args = self._input('args')
+ debug('running {}', args)
+ try:
+ os.makedirs(self.__obj_dir, exist_ok=True)
+ except OSError as e:
+ raise TaskError('failed to create %s: ' % self.__obj_dir + str(e)) \
+ from e
+ try:
+ verbose_cmd(args)
+ p = subprocess.Popen(args,
+ stdin=subprocess.DEVNULL,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT)
+ stdout, has_errors, missing_includes = self.__feed_terminal(p.stdout)
+ status = p.wait()
+ except OSError as e:
+ raise TaskError('failed to execute latex task: ' + str(e)) from e
+
+ # Register environment variable inputs
+ for env_var in ['TEXMFOUTPUT', 'TEXINPUTS', 'TEXFORMATS', 'TEXPOOL',
+ 'TFMFONTS', 'PATH']:
+ self._input('env', env_var)
+
+ jobname, outname = self.__parse_jobname(stdout)
+ inputs, outputs = self.__parse_recorder(jobname)
+
+ # LaTeX overwrites its own inputs. Mark its output files as
+ # clobbered before we hash its input files.
+ for path in outputs:
+ # In some abort cases (e.g., >=100 errors), LaTeX claims
+ # output files that don't actually exist.
+ if os.path.exists(path):
+ hash_cache.clobber(path)
+ # Depend on input files. Task.run pre-hashed outputs from the
+ # previous run, so if this isn't the first run and as long as
+ # the set of outputs didn't change, we'll be able to get the
+ # input hashes, even if they were clobbered.
+ for path in inputs:
+ self._input('file', path)
+
+ if missing_includes:
+ # Missing \includes are tricky. Ideally we'd depend on
+ # the absence of some file, but in fact we'd have to
+ # depend on the failure of a whole kpathsea lookup.
+ # Rather than try to be clever, just mark this as an
+ # unknown input so we'll run at least once on the next
+ # invocation.
+ self._input('unknown_input')
+
+ if not self.__create_outdirs(stdout) and has_errors:
+ # LaTeX reported unrecoverable errors (other than output
+ # directory errors, which we just fixed). We could
+ # continue to stabilize the document, which may change
+ # some of the other problems reported (but not the
+ # unrecoverable errors), or we can just abort now and get
+ # back to the user quickly with the major errors. We opt
+ # for the latter.
+ global terminate_task_loop
+ terminate_task_loop = True
+ # This error could depend on something we failed to track.
+ # It would be really confusing if we continued to report
+ # the error after the user fixed it, so be conservative
+ # and force a re-run next time.
+ self._input('unknown_input')
+
+ return RunResult(outputs,
+ {'jobname': jobname, 'outname': outname,
+ 'status': status})
+
+ def __feed_terminal(self, stdout):
+ prefix = 'latex'
+ if self.__pass > 1:
+ prefix += ' ({})'.format(self.__pass)
+ with Progress(prefix) as progress:
+ buf = []
+ filt = LaTeXFilter()
+ while True:
+ # Use os.read to read only what's available on the pipe,
+ # without waiting to fill a buffer
+ data = os.read(stdout.fileno(), 4096)
+ if not data:
+ break
+ # See "A note about encoding" above
+ data = data.decode('ascii', errors='surrogateescape')
+ buf.append(data)
+ filt.feed(data)
+ file_stack = filt.get_file_stack()
+ if file_stack:
+ tos = file_stack[-1]
+ if tos.startswith('./'):
+ tos = tos[2:]
+ progress.update('>' * len(file_stack) + ' ' + tos)
+ else:
+ progress.update('')
+
+ # Were there unrecoverable errors?
+ has_errors = any(msg.typ == 'error' for msg in filt.get_messages())
+
+ return ''.join(buf), has_errors, filt.has_missing_includes()
+
+ def __parse_jobname(self, stdout):
+ """Extract the job name and output name from latex's output.
+
+ We get these from latex because they depend on complicated
+ file name parsing rules, are affected by arguments like
+ -output-directory, and may be just "texput" if things fail
+ really early. The output name may be None if there were no
+ pages of output.
+ """
+ jobname = outname = None
+ for m in re.finditer(r'^Transcript written on "?(.*)\.log"?\.$', stdout,
+ re.MULTILINE | re.DOTALL):
+ jobname = m.group(1).replace('\n', '')
+ if jobname is None:
+ print(stdout, file=sys.stderr)
+ raise TaskError('failed to extract job name from latex log')
+ for m in re.finditer(r'^Output written on "?(.*\.[^ ."]+)"? \([0-9]+ page',
+ stdout, re.MULTILINE | re.DOTALL):
+ outname = m.group(1).replace('\n', '')
+ if outname is None and not \
+ re.search(r'^No pages of output\.$|^! Emergency stop\.$'
+ r'|^! ==> Fatal error occurred, no output PDF file produced!$',
+ stdout, re.MULTILINE):
+ print(stdout, file=sys.stderr)
+ raise TaskError('failed to extract output name from latex log')
+
+ # LuaTeX (0.76.0) doesn't include the output directory in the
+ # logged transcript or output file name.
+ if os.path.basename(jobname) == jobname and \
+ os.path.exists(os.path.join(self.__obj_dir, jobname + '.log')):
+ jobname = os.path.join(self.__obj_dir, jobname)
+ if outname is not None:
+ outname = os.path.join(self.__obj_dir, outname)
+
+ return jobname, outname
+
+ def __parse_recorder(self, jobname):
+ """Parse file recorder output."""
+ # XXX If latex fails because a file isn't found, that doesn't
+ # go into the .fls file, but creating that file will affect
+ # the computation, so it should be included as an input.
+ # Though it's generally true that files can be added earlier
+ # in search paths and will affect the output without us knowing.
+ #
+ # XXX This is a serious problem for bibtex, since the first
+ # run won't depend on the .bbl file! But maybe the .aux file
+ # will always cause a re-run, at which point the .bbl will
+ # exist?
+ filename = jobname + '.fls'
+ try:
+ recorder = open(filename)
+ except OSError as e:
+ raise TaskError('failed to open file recorder output: ' + str(e)) \
+ from e
+ pwd, inputs, outputs = '', set(), set()
+ for linenum, line in enumerate(recorder):
+ parts = line.rstrip('\n').split(' ', 1)
+ if parts[0] == 'PWD':
+ pwd = parts[1]
+ elif parts[0] in ('INPUT', 'OUTPUT'):
+ if parts[1].startswith('/'):
+ path = parts[1]
+ else:
+ # Try to make "nice" paths, especially for clean
+ path = os.path.relpath(os.path.join(pwd, parts[1]))
+ if parts[0] == 'INPUT':
+ inputs.add(path)
+ else:
+ outputs.add(path)
+ else:
+ raise TaskError('syntax error on line {} of {}'
+ .format(linenum, filename))
+ # Ironically, latex omits the .fls file itself
+ outputs.add(filename)
+ return inputs, outputs
+
+ def __create_outdirs(self, stdout):
+ # In some cases, such as \include'ing a file from a
+ # subdirectory, TeX will attempt to create files in
+ # subdirectories of the output directory that don't exist.
+ # Detect this, create the output directory, and re-run.
+ m = re.search('^! I can\'t write on file `(.*)\'\\.$', stdout, re.M)
+ if m and m.group(1).find('/') > 0 and '../' not in m.group(1):
+ debug('considering creating output sub-directory for {}'.
+ format(m.group(1)))
+ subdir = os.path.dirname(m.group(1))
+ newdir = os.path.join(self.__obj_dir, subdir)
+ if os.path.isdir(subdir) and not os.path.isdir(newdir):
+ debug('creating output subdirectory {}'.format(newdir))
+ try:
+ mkdir_p(newdir)
+ except OSError as e:
+ raise TaskError('failed to create output subdirectory: ' +
+ str(e)) from e
+ self._input('unstable')
+ return True
+
+ def report(self):
+ extra = self._get_result_extra()
+ if extra is None:
+ return 0
+
+ # Parse the log
+ logfile = open(extra['jobname'] + '.log', 'rt', errors='surrogateescape')
+ for msg in self.__clean_messages(
+ LaTeXFilter(self.__nowarns).feed(
+ logfile.read(), True).get_messages()):
+ msg.emit()
+
+ # Return LaTeX's exit status
+ return extra['status']
+
+ def __clean_messages(self, msgs):
+ """Make some standard log messages more user-friendly."""
+ have_undefined_reference = False
+ for msg in msgs:
+ if msg.msg == '==> Fatal error occurred, no output PDF file produced!':
+ msg = msg._replace(typ='info',
+ msg='Fatal error (no output file produced)')
+ if msg.msg.startswith('[LaTeX] '):
+ # Strip unnecessary package name
+ msg = msg._replace(msg=msg.msg.split(' ', 1)[1])
+ if re.match(r'Reference .* undefined', msg.msg):
+ have_undefined_reference = True
+ if have_undefined_reference and \
+ re.match(r'There were undefined references', msg.msg):
+ # LaTeX prints this at the end so the user knows it's
+ # worthwhile looking back at the log. Since latexrun
+ # makes the earlier messages obvious, this is
+ # redundant.
+ continue
+ yield msg
+
+ def get_tex_filename(self):
+ return self.__tex_filename
+
+ def get_jobname(self):
+ extra = self._get_result_extra()
+ if extra is None:
+ return None
+ return extra['jobname']
+
+ def get_outname(self):
+ extra = self._get_result_extra()
+ if extra is None:
+ return None
+ return extra['outname']
+
+ def get_status(self):
+ extra = self._get_result_extra()
+ if extra is None:
+ return None
+ return extra['status']
+
+class LaTeXCommit(Task):
+ def __init__(self, db, latex_task, output_path):
+ super().__init__(db, 'latex_commit::' +
+ normalize_input_path(latex_task.get_tex_filename()))
+ self.__latex_task = latex_task
+ self.__output_path = output_path
+ self.status = 'There were errors'
+
+ def _input_latex(self):
+ return self.__latex_task.get_status(), self.__latex_task.get_outname()
+
+ def _execute(self):
+ self.status = 'There were errors'
+
+ # If latex succeeded with output, atomically commit the output
+ status, outname = self._input('latex')
+ if status != 0 or outname is None:
+ debug('not committing (status {}, outname {})', status, outname)
+ if outname is None:
+ self.status = 'No pages of output'
+ return RunResult([], None)
+
+ commit = self.__output_path or os.path.basename(outname)
+ if os.path.abspath(commit) == os.path.abspath(outname):
+ debug('skipping commit (outname is commit name)')
+ self.status = None
+ return RunResult([], None)
+
+ try:
+ if os.path.exists(commit) and filecmp.cmp(outname, commit):
+ debug('skipping commit ({} and {} are identical)',
+ outname, commit)
+ # To avoid confusion, touch the output file
+ open(outname, 'r+b').close()
+ else:
+ debug('commiting {} to {}', outname, commit)
+ shutil.copy(outname, outname + '~')
+ os.rename(outname + '~', commit)
+ except OSError as e:
+ raise TaskError('error committing latex output: {}'.format(e)) from e
+ self._input('file', outname)
+ self.status = None
+ return RunResult([commit], None)
+
+class LaTeXFilter:
+ TRACE = False # Set to enable detailed parse tracing
+
+ def __init__(self, nowarns=[]):
+ self.__data = ''
+ self.__restart_pos = 0
+ self.__restart_file_stack = []
+ self.__restart_messages_len = 0
+ self.__messages = []
+ self.__first_file = None
+ self.__fatal_error = False
+ self.__missing_includes = False
+ self.__pageno = 1
+ self.__restart_pageno = 1
+
+ self.__suppress = {cls: 0 for cls in nowarns}
+
+ def feed(self, data, eof=False):
+ """Feed LaTeX log data to the parser.
+
+ The log data can be from LaTeX's standard output, or from the
+ log file. If there will be no more data, set eof to True.
+ """
+
+ self.__data += data
+ self.__data_complete = eof
+
+ # Reset to last known-good restart point
+ self.__pos = self.__restart_pos
+ self.__file_stack = self.__restart_file_stack.copy()
+ self.__messages = self.__messages[:self.__restart_messages_len]
+ self.__lstart = self.__lend = -1
+ self.__pageno = self.__restart_pageno
+
+ # Parse forward
+ while self.__pos < len(self.__data):
+ self.__noise()
+
+ # Handle suppressed warnings
+ if eof:
+ msgs = ['%d %s warning%s' % (count, cls, "s" if count > 1 else "")
+ for cls, count in self.__suppress.items() if count]
+ if msgs:
+ self.__message('info', None,
+ '%s not shown (use -Wall to show them)' %
+ ', '.join(msgs), filename=self.__first_file)
+
+ if eof and len(self.__file_stack) and not self.__fatal_error:
+ # Fatal errors generally cause TeX to "succumb" without
+ # closing the file stack, so don't complain in that case.
+ self.__message('warning', None,
+ "unbalanced `(' in log; file names may be wrong")
+ return self
+
+ def get_messages(self):
+ """Return a list of warning and error Messages."""
+ return self.__messages
+
+ def get_file_stack(self):
+ """Return the file stack for the data that has been parsed.
+
+ This results a list from outermost file to innermost file.
+ The list may be empty.
+ """
+
+ return self.__file_stack
+
+ def has_missing_includes(self):
+ """Return True if the log reported missing \\include files."""
+ return self.__missing_includes
+
+ def __save_restart_point(self):
+ """Save the current state as a known-good restart point.
+
+ On the next call to feed, the parser will reset to this point.
+ """
+ self.__restart_pos = self.__pos
+ self.__restart_file_stack = self.__file_stack.copy()
+ self.__restart_messages_len = len(self.__messages)
+ self.__restart_pageno = self.__pageno
+
+ def __message(self, typ, lineno, msg, cls=None, filename=None):
+ if cls is not None and cls in self.__suppress:
+ self.__suppress[cls] += 1
+ return
+ filename = filename or (self.__file_stack[-1] if self.__file_stack
+ else self.__first_file)
+ self.__messages.append(Message(typ, filename, lineno, msg))
+
+ def __ensure_line(self):
+ """Update lstart and lend."""
+ if self.__lstart <= self.__pos < self.__lend:
+ return
+ self.__lstart = self.__data.rfind('\n', 0, self.__pos) + 1
+ self.__lend = self.__data.find('\n', self.__pos) + 1
+ if self.__lend == 0:
+ self.__lend = len(self.__data)
+
+ @property
+ def __col(self):
+ """The 0-based column number of __pos."""
+ self.__ensure_line()
+ return self.__pos - self.__lstart
+
+ @property
+ def __avail(self):
+ return self.__pos < len(self.__data)
+
+ def __lookingat(self, needle):
+ return self.__data.startswith(needle, self.__pos)
+
+ def __lookingatre(self, regexp, flags=0):
+ return re.compile(regexp, flags=flags).match(self.__data, self.__pos)
+
+ def __skip_line(self):
+ self.__ensure_line()
+ self.__pos = self.__lend
+
+ def __consume_line(self, unwrap=False):
+ self.__ensure_line()
+ data = self.__data[self.__pos:self.__lend]
+ self.__pos = self.__lend
+ if unwrap:
+ # TeX helpfully wraps all terminal output at 79 columns
+ # (max_print_line). If requested, unwrap it. There's
+ # simply no way to do this perfectly, since there could be
+ # a line that happens to be 79 columns.
+ #
+ # We check for >=80 because a bug in LuaTeX causes it to
+ # wrap at 80 columns instead of 79 (LuaTeX #900).
+ while self.__lend - self.__lstart >= 80:
+ if self.TRACE: print('<{}> wrapping'.format(self.__pos))
+ self.__ensure_line()
+ data = data[:-1] + self.__data[self.__pos:self.__lend]
+ self.__pos = self.__lend
+ return data
+
+ # Parser productions
+
+ def __noise(self):
+ # Most of TeX's output is line noise that combines error
+ # messages, warnings, file names, user errors and warnings,
+ # and echos of token lists and other input. This attempts to
+ # tease these apart, paying particular attention to all of the
+ # places where TeX echos input so that parens in the input do
+ # not confuse the file name scanner. There are three
+ # functions in TeX that echo input: show_token_list (used by
+ # runaway and show_context, which is used by print_err),
+ # short_display (used by overfull/etc h/vbox), and show_print
+ # (used in issue_message and the same places as
+ # show_token_list).
+ lookingat, lookingatre = self.__lookingat, self.__lookingatre
+ if self.__col == 0:
+ # The following messages are always preceded by a newline
+ if lookingat('! '):
+ return self.__errmessage()
+ if lookingat('!pdfTeX error: '):
+ return self.__pdftex_fail()
+ if lookingat('Runaway '):
+ return self.__runaway()
+ if lookingatre(r'(Overfull|Underfull|Loose|Tight) \\[hv]box \('):
+ return self.__bad_box()
+ if lookingatre('(Package |Class |LaTeX |pdfTeX )?(\w+ )?warning: ', re.I):
+ return self.__generic_warning()
+ if lookingatre('No file .*\\.tex\\.$', re.M):
+ # This happens with \includes of missing files. For
+ # whatever reason, LaTeX doesn't consider this even
+ # worth a warning, but I do!
+ self.__message('warning', None,
+ self.__simplify_message(
+ self.__consume_line(unwrap=True).strip()))
+ self.__missing_includes = True
+ return
+ # Other things that are common and irrelevant
+ if lookingatre(r'(Package|Class|LaTeX) (\w+ )?info: ', re.I):
+ return self.__generic_info()
+ if lookingatre(r'(Document Class|File|Package): '):
+ # Output from "\ProvidesX"
+ return self.__consume_line(unwrap=True)
+ if lookingatre(r'\\\w+=\\[a-z]+\d+\n'):
+ # Output from "\new{count,dimen,skip,...}"
+ return self.__consume_line(unwrap=True)
+
+ # print(self.__data[self.__lstart:self.__lend].rstrip())
+ # self.__pos = self.__lend
+ # return
+
+ # Now that we've substantially reduced the spew and hopefully
+ # eliminated all input echoing, we're left with the file name
+ # stack, page outs, and random other messages from both TeX
+ # and various packages. We'll assume at this point that all
+ # parentheses belong to the file name stack or, if they're in
+ # random other messages, they're at least balanced and nothing
+ # interesting happens between them. For page outs, ship_out
+ # prints a space if not at the beginning of a line, then a
+ # "[", then the page number being shipped out (this is
+ # usually, but not always, followed by "]").
+ m = re.compile(r'[(){}\n]|(?<=[\n ])\[\d+', re.M).\
+ search(self.__data, self.__pos)
+ if m is None:
+ self.__pos = len(self.__data)
+ return
+ self.__pos = m.start() + 1
+ ch = self.__data[m.start()]
+ if ch == '\n':
+ # Save this as a known-good restart point for incremental
+ # parsing, since we definitely didn't match any of the
+ # known message types above.
+ self.__save_restart_point()
+ elif ch == '[':
+ # This is printed at the end of a page, so we're beginning
+ # page n+1.
+ self.__pageno = int(self.__lookingatre(r'\d+').group(0)) + 1
+ elif ((self.__data.startswith('`', m.start() - 1) or
+ self.__data.startswith('`\\', m.start() - 2)) and
+ self.__data.startswith('\'', m.start() + 1)):
+ # (, ), {, and } sometimes appear in TeX's error
+ # descriptions, but they're always in `'s (and sometimes
+ # backslashed)
+ return
+ elif ch == '(':
+ # XXX Check that the stack doesn't drop to empty and then re-grow
+ first = self.__first_file is None and self.__col == 1
+ filename = self.__filename()
+ self.__file_stack.append(filename)
+ if first:
+ self.__first_file = filename
+ if self.TRACE:
+ print('<{}>{}enter {}'.format(
+ m.start(), ' '*len(self.__file_stack), filename))
+ elif ch == ')':
+ if len(self.__file_stack):
+ if self.TRACE:
+ print('<{}>{}exit {}'.format(
+ m.start(), ' '*len(self.__file_stack),
+ self.__file_stack[-1]))
+ self.__file_stack.pop()
+ else:
+ self.__message('warning', None,
+ "extra `)' in log; file names may be wrong ")
+ elif ch == '{':
+ # TeX uses this for various things we want to ignore, like
+ # file names and print_mark. Consume up to the '}'
+ epos = self.__data.find('}', self.__pos)
+ if epos != -1:
+ self.__pos = epos + 1
+ else:
+ self.__message('warning', None,
+ "unbalanced `{' in log; file names may be wrong")
+ elif ch == '}':
+ self.__message('warning', None,
+ "extra `}' in log; file names may be wrong")
+
+ def __filename(self):
+ initcol = self.__col
+ first = True
+ name = ''
+ # File names may wrap, but if they do, TeX will always print a
+ # newline before the open paren
+ while first or (initcol == 1 and self.__lookingat('\n')
+ and self.__col >= 79):
+ if not first:
+ self.__pos += 1
+ m = self.__lookingatre(r'[^(){} \n]*')
+ name += m.group()
+ self.__pos = m.end()
+ first = False
+ return name
+
+ def __simplify_message(self, msg):
+ msg = re.sub(r'^(?:Package |Class |LaTeX |pdfTeX )?([^ ]+) (?:Error|Warning): ',
+ r'[\1] ', msg, flags=re.I)
+ msg = re.sub(r'\.$', '', msg)
+ msg = re.sub(r'has occurred (while \\output is active)', r'\1', msg)
+ return msg
+
+ def __errmessage(self):
+ # Procedure print_err (including \errmessage, itself used by
+ # LaTeX's \GenericError and all of its callers), as well as
+ # fatal_error. Prints "\n! " followed by error text
+ # ("Emergency stop" in the case of fatal_error). print_err is
+ # always followed by a call to error, which prints a period,
+ # and a newline...
+ msg = self.__consume_line(unwrap=True)[1:].strip()
+ is_fatal_error = (msg == 'Emergency stop.')
+ msg = self.__simplify_message(msg)
+ # ... and then calls show_context, which prints the input
+ # stack as pairs of lines giving the context. These context
+ # lines are truncated so they never wrap. Each pair of lines
+ # will start with either "<something> " if the context is a
+ # token list, "<*> " for terminal input (or command line),
+ # "<read ...>" for stream reads, something like "\macroname
+ # #1->" for macros (though everything after \macroname is
+ # subject to being elided as "..."), or "l.[0-9]+ " if it's a
+ # file. This is followed by the errant input with a line
+ # break where the error occurred.
+ lineno = None
+ found_context = False
+ stack = []
+ while self.__avail:
+ m1 = self.__lookingatre(r'<([a-z ]+|\*|read [^ >]*)> |\\.*(->|...)')
+ m2 = self.__lookingatre('l\.[0-9]+ ')
+ if m1:
+ found_context = True
+ pre = self.__consume_line().rstrip('\n')
+ stack.append(pre)
+ elif m2:
+ found_context = True
+ pre = self.__consume_line().rstrip('\n')
+ info, rest = pre.split(' ', 1)
+ lineno = int(info[2:])
+ stack.append(rest)
+ elif found_context:
+ # Done with context
+ break
+ if found_context:
+ # Consume the second context line
+ post = self.__consume_line().rstrip('\n')
+ # Clean up goofy trailing ^^M TeX sometimes includes
+ post = re.sub(r'\^\^M$', '', post)
+ if post[:len(pre)].isspace() and not post.isspace():
+ stack.append(len(stack[-1]))
+ stack[-2] += post[len(pre):]
+ else:
+ # If we haven't found the context, skip the line.
+ self.__skip_line()
+ stack_msg = ''
+ for i, trace in enumerate(stack):
+ stack_msg += ('\n ' + (' ' * trace) + '^'
+ if isinstance(trace, int) else
+ '\n at ' + trace.rstrip() if i == 0 else
+ '\n from ' + trace.rstrip())
+
+ if is_fatal_error:
+ # fatal_error always prints one additional line of message
+ info = self.__consume_line().strip()
+ if info.startswith('*** '):
+ info = info[4:]
+ msg += ': ' + info.lstrip('(').rstrip(')')
+
+ self.__message('error', lineno, msg + stack_msg)
+ self.__fatal_error = True
+
+ def __pdftex_fail(self):
+ # Procedure pdftex_fail. Prints "\n!pdfTeX error: ", the
+ # message, and a newline. Unlike print_err, there's never
+ # context.
+ msg = self.__consume_line(unwrap=True)[1:].strip()
+ msg = self.__simplify_message(msg)
+ self.__message('error', None, msg)
+
+ def __runaway(self):
+ # Procedure runaway. Prints "\nRunaway ...\n" possibly
+ # followed by token list (user text). Always followed by a
+ # call to print_err, so skip lines until we see the print_err.
+ self.__skip_line() # Skip "Runaway ...\n"
+ if not self.__lookingat('! ') and self.__avail:
+ # Skip token list, which is limited to one line
+ self.__skip_line()
+
+ def __bad_box(self):
+ # Function hpack and vpack. hpack prints a warning, a
+ # newline, then a short_display of the offending text.
+ # Unfortunately, there's nothing indicating the end of the
+ # offending text, but it should be on one (possible wrapped)
+ # line. vpack prints a warning and then, *unless output is
+ # active*, a newline. The missing newline is probably a bug,
+ # but it sure makes our lives harder.
+ origpos = self.__pos
+ msg = self.__consume_line()
+ m = re.search(r' in (?:paragraph|alignment) at lines ([0-9]+)--([0-9]+)', msg) or \
+ re.search(r' detected at line ([0-9]+)', msg)
+ if m:
+ # Sometimes TeX prints crazy line ranges like "at lines
+ # 8500--250". The lower number seems roughly sane, so use
+ # that. I'm not sure what causes this, but it may be
+ # related to shipout routines messing up line registers.
+ lineno = min(int(m.group(1)), int(m.groups()[-1]))
+ msg = msg[:m.start()]
+ else:
+ m = re.search(r' while \\output is active', msg)
+ if m:
+ lineno = None
+ msg = msg[:m.end()]
+ else:
+ self.__message('warning', None,
+ 'malformed bad box message in log')
+ return
+ # Back up to the end of the known message text
+ self.__pos = origpos + m.end()
+ if self.__lookingat('\n'):
+ # We have a newline, so consume it and look for the
+ # offending text.
+ self.__pos += 1
+ # If there is offending text, it will start with a font
+ # name, which will start with a \.
+ if 'hbox' in msg and self.__lookingat('\\'):
+ self.__consume_line(unwrap=True)
+ msg = self.__simplify_message(msg) + ' (page {})'.format(self.__pageno)
+ cls = msg.split(None, 1)[0].lower()
+ self.__message('warning', lineno, msg, cls=cls)
+
+ def __generic_warning(self):
+ # Warnings produced by LaTeX's \GenericWarning (which is
+ # called by \{Package,Class}Warning and \@latex@warning),
+ # warnings produced by pdftex_warn, and other random warnings.
+ msg, cls = self.__generic_info()
+ # Most warnings include an input line emitted by \on@line
+ m = re.search(' on input line ([0-9]+)', msg)
+ if m:
+ lineno = int(m.group(1))
+ msg = msg[:m.start()]
+ else:
+ lineno = None
+ msg = self.__simplify_message(msg)
+ self.__message('warning', lineno, msg, cls=cls)
+
+ def __generic_info(self):
+ # Messages produced by LaTeX's \Generic{Error,Warning,Info}
+ # and things that look like them
+ msg = self.__consume_line(unwrap=True).strip()
+ # Package and class messages are continued with lines
+ # containing '(package name) '
+ pkg_name = msg.split(' ', 2)[1]
+ prefix = '(' + pkg_name + ') '
+ while self.__lookingat(prefix):
+ # Collect extra lines. It's important that we keep these
+ # because they may contain context information like line
+ # numbers.
+ extra = self.__consume_line(unwrap=True)
+ msg += ' ' + extra[len(prefix):].strip()
+ return msg, pkg_name.lower()
+
+##################################################################
+# BibTeX task
+#
+
+class BibTeX(Task):
+ def __init__(self, db, latex_task, cmd, cmd_args, nowarns, obj_dir):
+ super().__init__(db, 'bibtex::' + normalize_input_path(
+ latex_task.get_tex_filename()))
+ self.__latex_task = latex_task
+ self.__cmd = cmd
+ self.__cmd_args = cmd_args
+ self.__obj_dir = obj_dir
+
+ def stable(self):
+ # If bibtex doesn't have its inputs, then it's stable because
+ # it has no effect on system state.
+ jobname = self.__latex_task.get_jobname()
+ if jobname is None:
+ # We don't know where the .aux file is until latex has run
+ return True
+ if not os.path.exists(jobname + '.aux'):
+ # Input isn't ready, so bibtex will simply fail without
+ # affecting system state. Hence, this task is trivially
+ # stable.
+ return True
+ if not self.__find_bib_cmds(os.path.dirname(jobname), jobname + '.aux'):
+ # The tex file doesn't refer to any bibliographic data, so
+ # don't run bibtex.
+ return True
+
+ return super().stable()
+
+ def __find_bib_cmds(self, basedir, auxname, stack=()):
+ debug('scanning for bib commands in {}'.format(auxname))
+ if auxname in stack:
+ raise TaskError('.aux file loop')
+ stack = stack + (auxname,)
+
+ try:
+ aux_data = open(auxname, errors='surrogateescape').read()
+ except FileNotFoundError:
+ # The aux file may not exist if latex aborted
+ return False
+ if re.search(r'^\\bibstyle\{', aux_data, flags=re.M) or \
+ re.search(r'^\\bibdata\{', aux_data, flags=re.M):
+ return True
+
+ if re.search(r'^\\abx@aux@cite\{', aux_data, flags=re.M):
+ # biber citation
+ return True
+
+ # Recurse into included aux files (see aux_input_command), in
+ # case \bibliography appears in an \included file.
+ for m in re.finditer(r'^\\@input\{([^}]*)\}', aux_data, flags=re.M):
+ if self.__find_bib_cmds(basedir, os.path.join(basedir, m.group(1)),
+ stack):
+ return True
+
+ return False
+
+ def _input_args(self):
+ if self.__is_biber():
+ aux_name = os.path.basename(self.__latex_task.get_jobname())
+ else:
+ aux_name = os.path.basename(self.__latex_task.get_jobname()) + '.aux'
+ return [self.__cmd] + self.__cmd_args + [aux_name]
+
+ def _input_cwd(self):
+ return os.path.dirname(self.__latex_task.get_jobname())
+
+ def _input_auxfile(self, auxname):
+ # We don't consider the .aux files regular inputs.
+ # Instead, we extract just the bit that BibTeX cares about
+ # and depend on that. See get_aux_command_and_process in
+ # bibtex.web.
+ debug('hashing filtered aux file {}', auxname)
+ try:
+ with open(auxname, 'rb') as aux:
+ h = hashlib.sha256()
+ for line in aux:
+ if line.startswith((b'\\citation{', b'\\bibdata{',
+ b'\\bibstyle{', b'\\@input{',
+ b'\\abx@aux@cite{')):
+ h.update(line)
+ return h.hexdigest()
+ except FileNotFoundError:
+ debug('{} does not exist', auxname)
+ return None
+
+ def __path_join(self, first, rest):
+ if rest is None:
+ # Append ':' to keep the default search path
+ return first + ':'
+ return first + ':' + rest
+
+ def __is_biber(self):
+ return "biber" in self.__cmd
+
+ def _execute(self):
+ # This gets complicated when \include is involved. \include
+ # switches to a different aux file and records its path in the
+ # main aux file. However, BibTeX does not consider this path
+ # to be relative to the location of the main aux file, so we
+ # have to run BibTeX *in the output directory* for it to
+ # follow these includes (there's no way to tell BibTeX other
+ # locations to search). Unfortunately, this means BibTeX will
+ # no longer be able to find local bib or bst files, but so we
+ # tell it where to look by setting BIBINPUTS and BSTINPUTS
+ # (luckily we can control this search). We have to pass this
+ # same environment down to Kpathsea when we resolve the paths
+ # in BibTeX's log.
+ args, cwd = self._input('args'), self._input('cwd')
+ debug('running {} in {}', args, cwd)
+
+ env = os.environ.copy()
+ env['BIBINPUTS'] = self.__path_join(os.getcwd(), env.get('BIBINPUTS'))
+ env['BSTINPUTS'] = self.__path_join(os.getcwd(), env.get('BSTINPUTS'))
+
+ try:
+ verbose_cmd(args, cwd, env)
+ p = subprocess.Popen(args, cwd=cwd, env=env,
+ stdin=subprocess.DEVNULL,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT)
+ stdout = self.__feed_terminal(p.stdout)
+ status = p.wait()
+ except OSError as e:
+ raise TaskError('failed to execute bibtex task: ' + str(e)) from e
+
+ inputs, auxnames, outbase = self.__parse_inputs(stdout, cwd, env)
+ if not inputs and not auxnames:
+ # BibTeX failed catastrophically.
+ print(stdout, file=sys.stderr)
+ raise TaskError('failed to execute bibtex task')
+
+ # Register environment variable inputs
+ for env_var in ['TEXMFOUTPUT', 'BSTINPUTS', 'BIBINPUTS', 'PATH']:
+ self._input('env', env_var)
+
+ # Register file inputs
+ for path in auxnames:
+ self._input('auxfile', path)
+ for path in inputs:
+ self._input('file', path)
+
+ if self.__is_biber():
+ outbase = os.path.join(cwd, outbase)
+ outputs = [outbase + '.bbl', outbase + '.blg']
+ return RunResult(outputs, {'outbase': outbase, 'status': status,
+ 'inputs': inputs})
+
+ def __feed_terminal(self, stdout):
+ with Progress('bibtex') as progress:
+ buf, linebuf = [], ''
+ while True:
+ data = os.read(stdout.fileno(), 4096)
+ if not data:
+ break
+ # See "A note about encoding" above
+ data = data.decode('ascii', errors='surrogateescape')
+ buf.append(data)
+ linebuf += data
+ while '\n' in linebuf:
+ line, _, linebuf = linebuf.partition('\n')
+ if line.startswith('Database file'):
+ progress.update(line.split(': ', 1)[1])
+ return ''.join(buf)
+
+ def __parse_inputs(self, log, cwd, env):
+ # BibTeX conveniently logs every file that it opens, and its
+ # log is actually sensible (see calls to a_open_in in
+ # bibtex.web.) The only trick is that these file names are
+ # pre-kpathsea lookup and may be relative to the directory we
+ # ran BibTeX in.
+ #
+ # Because BibTeX actually depends on very little in the .aux
+ # file (and it's likely other things will change in the .aux
+ # file), we don't count the whole .aux file as an input, but
+ # instead depend only on the lines that matter to BibTeX.
+ kpathsea = Kpathsea('bibtex')
+ inputs = []
+ auxnames = []
+ outbase = None
+ for line in log.splitlines():
+ m = re.match('(?:The top-level auxiliary file:'
+ '|A level-[0-9]+ auxiliary file:) (.*)', line)
+ if m:
+ auxnames.append(os.path.join(cwd, m.group(1)))
+ continue
+ m = re.match('(?:(The style file:)|(Database file #[0-9]+:)) (.*)',
+ line)
+ if m:
+ filename = m.group(3)
+ if m.group(1):
+ filename = kpathsea.find_file(filename, 'bst', cwd, env)
+ elif m.group(2):
+ filename = kpathsea.find_file(filename, 'bib', cwd, env)
+
+ # If this path is relative to the source directory,
+ # clean it up for error reporting and portability of
+ # the dependency DB
+ if filename.startswith('/'):
+ relname = os.path.relpath(filename)
+ if '../' not in relname:
+ filename = relname
+
+ inputs.append(filename)
+
+ # biber output
+ m = re.search("Found BibTeX data source '(.*?)'",
+ line)
+ if m:
+ filename = m.group(1)
+ inputs.append(filename)
+
+ m = re.search("Logfile is '(.*?)'", line)
+ if m:
+ outbase = m.group(1)[:-4]
+
+ if outbase is None:
+ outbase = auxnames[0][:-4]
+
+ return inputs, auxnames, outbase
+
+ def report(self):
+ extra = self._get_result_extra()
+ if extra is None:
+ return 0
+
+ # Parse and pretty-print the log
+ log = open(extra['outbase'] + '.blg', 'rt').read()
+ inputs = extra['inputs']
+ for msg in BibTeXFilter(log, inputs).get_messages():
+ msg.emit()
+
+ # BibTeX exits with 1 if there are warnings, 2 if there are
+ # errors, and 3 if there are fatal errors (sysdep.h).
+ # Translate to a normal UNIX exit status.
+ if extra['status'] >= 2:
+ return 1
+ return 0
+
+class BibTeXFilter:
+ def __init__(self, data, inputs):
+ self.__inputs = inputs
+ self.__key_locs = None
+
+ self.__messages = []
+
+ prev_line = ''
+ for line in data.splitlines():
+ msg = self.__process_line(prev_line, line)
+ if msg is not None:
+ self.__messages.append(Message(*msg))
+ prev_line = line
+
+ def get_messages(self):
+ """Return a list of warning and error Messages."""
+ # BibTeX reports most errors in no particular order. Sort by
+ # file and line.
+ return sorted(self.__messages,
+ key=lambda msg: (msg.filename or '', msg.lineno or 0))
+
+ def __process_line(self, prev_line, line):
+ m = None
+ def match(regexp):
+ nonlocal m
+ m = re.match(regexp, line)
+ return m
+
+ # BibTeX has many error paths, but luckily the set is closed,
+ # so we can find all of them. This first case is the
+ # workhorse format.
+ #
+ # AUX errors: aux_err/aux_err_return/aux_err_print
+ #
+ # BST errors: bst_ln_num_print/bst_err/
+ # bst_err_print_and_look_for_blank_line_return/
+ # bst_warn_print/bst_warn/
+ # skip_token/skip_token_print/
+ # bst_ext_warn/bst_ext_warn_print/
+ # bst_ex_warn/bst_ex_warn_print/
+ # bst_mild_ex_warn/bst_mild_ex_warn_print/
+ # bst_string_size_exceeded
+ #
+ # BIB errors: bib_ln_num_print/
+ # bib_err_print/bib_err/
+ # bib_warn_print/bib_warn/
+ # bib_one_of_two_expected_err/macro_name_warning/
+ if match('(.*?)---?line ([0-9]+) of file (.*)'):
+ # Sometimes the real error is printed on the previous line
+ if m.group(1) == 'while executing':
+ # bst_ex_warn. The real message is on the previous line
+ text = prev_line
+ else:
+ text = m.group(1) or prev_line
+ typ, msg = self.__canonicalize(text)
+ return (typ, m.group(3), int(m.group(2)), msg)
+
+ # overflow/print_overflow
+ if match('Sorry---you\'ve exceeded BibTeX\'s (.*)'):
+ return ('error', None, None, 'capacity exceeded: ' + m.group(1))
+ # confusion/print_confusion
+ if match('(.*)---this can\'t happen$'):
+ return ('error', None, None, 'internal error: ' + m.group(1))
+ # aux_end_err
+ if match('I found (no .*)---while reading file (.*)'):
+ return ('error', m.group(2), None, m.group(1))
+ # bad_cross_reference_print/
+ # nonexistent_cross_reference_error/
+ # @<Complain about a nested cross reference@>
+ #
+ # This is split across two lines. Match the second.
+ if match('^refers to entry "'):
+ typ, msg = self.__canonicalize(prev_line + ' ' + line)
+ msg = re.sub('^a (bad cross reference)', '\\1', msg)
+ # Try to give this key a location
+ filename = lineno = None
+ m2 = re.search(r'--entry "[^"]"', prev_line)
+ if m2:
+ filename, lineno = self.__find_key(m2.group(1))
+ return (typ, filename, lineno, msg)
+ # print_missing_entry
+ if match('Warning--I didn\'t find a database entry for (".*")'):
+ return ('warning', None, None,
+ 'no database entry for ' + m.group(1))
+ # x_warning
+ if match('Warning--(.*)'):
+ # Most formats give warnings about "something in <key>".
+ # Try to match it up.
+ filename = lineno = None
+ for m2 in reversed(list(re.finditer(r' in ([^, \t\n]+)\b', line))):
+ if m2:
+ filename, lineno = self.__find_key(m2.group(1))
+ if filename:
+ break
+ return ('warning', filename, lineno, m.group(1))
+ # @<Clean up and leave@>
+ if match('Aborted at line ([0-9]+) of file (.*)'):
+ return ('info', m.group(2), int(m.group(1)), 'aborted')
+
+ # biber type errors
+ if match('^.*> WARN - (.*)$'):
+ print ('warning', None, None, m.group(1))
+ m2 = re.match("(.*) in file '(.*?)', skipping ...", m.group(1))
+ if m2:
+ return ('warning', m2.group(2), "0", m2.group(1))
+ return ('warning', None, None, m.group(1))
+
+ if match('^.*> ERROR - (.*)$'):
+ m2 = re.match("BibTeX subsystem: (.*?), line (\d+), (.*)$", m.group(1))
+ if m2:
+ return ('error', m2.group(1), m2.group(2), m2.group(3))
+ return ('error', None, None, m.group(1))
+
+
+ def __canonicalize(self, msg):
+ if msg.startswith('Warning'):
+ msg = re.sub('^Warning-*', '', msg)
+ typ = 'warning'
+ else:
+ typ = 'error'
+ msg = re.sub('^I(\'m| was)? ', '', msg)
+ msg = msg[:1].lower() + msg[1:]
+ return typ, msg
+
+ def __find_key(self, key):
+ if self.__key_locs is None:
+ p = BibTeXKeyParser()
+ self.__key_locs = {}
+ for filename in self.__inputs:
+ data = open(filename, 'rt', errors='surrogateescape').read()
+ for pkey, lineno in p.parse(data):
+ self.__key_locs.setdefault(pkey, (filename, lineno))
+ return self.__key_locs.get(key, (None, None))
+
+class BibTeXKeyParser:
+ """Just enough of a BibTeX parser to find keys."""
+
+ def parse(self, data):
+ IDENT_RE = '(?![0-9])([^\x00-\x20\x80-\xff \t"#%\'(),={}]+)'
+ self.__pos, self.__data = 0, data
+ # Find the next entry
+ while self.__consume('[^@]*@[ \t\n]*'):
+ # What type of entry?
+ if not self.__consume(IDENT_RE + '[ \t\n]*'):
+ continue
+ typ = self.__m.group(1)
+ if typ == 'comment':
+ continue
+ start = self.__pos
+ if not self.__consume('([{(])[ \t\n]*'):
+ continue
+ closing, key_re = {'{' : ('}', '([^, \t\n}]*)'),
+ '(' : (')', '([^, \t\n]*)')}[self.__m.group(1)]
+ if typ not in ('preamble', 'string'):
+ # Regular entry; get key
+ if self.__consume(key_re):
+ yield self.__m.group(1), self.__lineno()
+ # Consume body of entry
+ self.__pos = start
+ self.__balanced(closing)
+
+ def __consume(self, regexp):
+ self.__m = re.compile(regexp).match(self.__data, self.__pos)
+ if self.__m:
+ self.__pos = self.__m.end()
+ return self.__m
+
+ def __lineno(self):
+ return self.__data.count('\n', 0, self.__pos) + 1
+
+ def __balanced(self, closing):
+ self.__pos += 1
+ level = 0
+ skip = re.compile('[{}' + closing + ']')
+ while True:
+ m = skip.search(self.__data, self.__pos)
+ if not m:
+ break
+ self.__pos = m.end()
+ ch = m.group(0)
+ if level == 0 and ch == closing:
+ break
+ elif ch == '{':
+ level += 1
+ elif ch == '}':
+ level -= 1
+
+class Kpathsea:
+ def __init__(self, program_name):
+ self.__progname = program_name
+
+ def find_file(self, name, format, cwd=None, env=None):
+ """Return the resolved path of 'name' or None."""
+
+ args = ['kpsewhich', '-progname', self.__progname, '-format', format,
+ name]
+ try:
+ verbose_cmd(args, cwd, env)
+ path = subprocess.check_output(
+ args, cwd=cwd, env=env, universal_newlines=True).strip()
+ except subprocess.CalledProcessError as e:
+ if e.returncode != 1:
+ raise
+ return None
+ if cwd is None:
+ return path
+ return os.path.join(cwd, path)
+
+if __name__ == "__main__":
+ main()