From a2c6733805465aaf113b26c45609166c2254903c Mon Sep 17 00:00:00 2001
From: Neil <nyamatongwe@gmail.com>
Date: Mon, 1 Jul 2013 18:45:48 +1000
Subject: Added CharacterCategory.

---
 scripts/GenerateCharacterCategory.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 scripts/GenerateCharacterCategory.py

(limited to 'scripts/GenerateCharacterCategory.py')

diff --git a/scripts/GenerateCharacterCategory.py b/scripts/GenerateCharacterCategory.py
new file mode 100644
index 000000000..6e3ffd3ba
--- /dev/null
+++ b/scripts/GenerateCharacterCategory.py
@@ -0,0 +1,35 @@
+# Script to generate CharacterCategory.cxx from Python's Unicode data
+# Should be run rarely when a Python with a new version of Unicode data is available.
+# Should not be run with old versions of Python.
+
+import codecs, os, platform, sys, unicodedata
+
+from FileGenerator import Regenerate
+
+def findCategories(filename):
+    with codecs.open(filename, "r", "UTF-8") as infile:
+        lines = [x.strip() for x in infile.readlines() if "\tcc" in x]
+    values = "".join(lines).replace(" ","").split(",")
+    print(values)
+    return [v[2:] for v in values]
+
+def updateCharacterCategory(filename):
+    values = ["// Created with Python %s,  Unicode %s" % (
+        platform.python_version(), unicodedata.unidata_version)]
+    category = unicodedata.category(chr(0))
+    startRange = 0
+    for ch in range(sys.maxunicode):
+        uch = chr(ch)
+        if unicodedata.category(uch) != category:
+            value = startRange * 32 + categories.index(category)
+            values.append("%d," % value)
+            category = unicodedata.category(uch)
+            startRange = ch
+    value = startRange * 32 + categories.index(category)
+    values.append("%d," % value)
+
+    Regenerate(filename, "//", values)
+
+categories = findCategories("../lexlib/CharacterCategory.h")
+
+updateCharacterCategory("../lexlib/CharacterCategory.cxx")
-- 
cgit v1.2.3