2 files changed, 83 insertions, 0 deletions
diff --git a/README.md b/README.md
index 73a9fe1..6df1cbd 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,46 @@ To build the sample `select-from.ebnf`, type something like:
 
     cat samples/select-from.ebnf | ./ebnf.sno | pic | groff -Tps >select-from.ps
 
+## HIGHLIGHT (Python)
+
+`pygments-groff.py` is a syntax highlighting preprocessor based on [Pygments](https://pygments.org/) and
+consequently written in Python 3.
+It is the most powerful (and probably fastest) of the syntax highlighting preprocessors presented here.
+It should also be more portable as it does not rely on stdout redirection magic.
+It should work with all Groff macro suites and even preserves the line numbering
+in Groff error messages.
+
+You can process embedded blocks of code as in the following ms-based example:
+
+```groff
+.LD
+.CW
+.lg 0
+.HIGHLIGHT c
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+	printf("Hello world!\n");
+	return 0;
+}
+.HIGHLIGHT
+.DE
+```
+
+Note that you may have to do more before `.HIGHLIGHT` - for instance redefine chars -
+depending on your use case.
+
+The `default` language identifier is useful to include code without highlighting,
+but still benefit from Pygment's preprocessing in order to achieve verbatim text.
+A list of language identifiers (short names) can be found on the [Pygments website](https://pygments.org/languages/).
+
+Just like `highlight.lua`, you can specify a file name directly after the language identifier:
+
+```groff
+.HIGHLIGHT c hello.c
+```
+
 ## HIGHLIGHT (SNOBOL4)
 
 `highlight.sno` is a small preprocessor written in [CSNOBOL4](http://www.snobol4.org/csnobol4/)
diff --git a/pygments-groff.py b/pygments-groff.py
new file mode 100755
index 0000000..49637eb
--- /dev/null
+++ b/pygments-groff.py
@@ -0,0 +1,43 @@
+#!/usr/local/bin/python3.9
+from pygments.lexers import get_lexer_by_name, RawTokenLexer
+from pygments.formatters import GroffFormatter
+from pygments import highlight
+import re
+from sys import stdin, stdout, stderr
+
+formatter = GroffFormatter(style="sas")
+
+start_pattern = re.compile(r"\. *HIGHLIGHT +([^ ]+)( +(.*))?\n")
+end_pattern = re.compile(r"\. *HIGHLIGHT *\n")
+
+while True:
+    for line in stdin:
+        stdout.write(line)
+        params = start_pattern.match(line)
+        if params:
+            break
+    if not params: # EOF
+        break
+
+    lang, filename = params.group(1, 3)
+
+    lexer = RawTokenLexer() if lang == "default" else get_lexer_by_name(lang)
+    # NOTE: This option is broken and will result in a bogus empty line with the GroffFormatter
+    lexer.ensurenl = False
+
+    contents = []
+    if filename:
+        contents.append(open(filename).read())
+        stdout.write(".ds HIGHLIGHT-LF \\n[.c] \\n[.F]\n")
+        stdout.write(".lf 1 "+filename+"\n")
+    else:
+        for line in stdin:
+            if end_pattern.match(line):
+                stdout.write(line)
+                break
+            contents.append(line)
+
+    formatted = highlight("".join(contents), lexer, formatter)
+    stdout.write(formatted.replace("\n\n", "\n\\&\n")+"\n")
+    if filename:
+        stdout.write(".lf \\*[HIGHLIGHT-LF]\n")