#!/usr/bin/env python3 import os import sys import textwrap self_path = os.path.dirname(os.path.realpath(__file__)); f = open(self_path + "/unicode/DerivedGeneralCategory.txt", "r") codepoint_list = [] category_list = [ "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" ] # Filter codepoints falling in the right category: for line in f: comment_off = line.find("#") if comment_off >= 0: line = line[:comment_off] line = line.strip() if not line: continue char_range, category = line.split(";") char_range = char_range.strip() category = category.strip() if not category in category_list: continue delim_off = char_range.find("..") if delim_off >= 0: codepoint0 = int(char_range[:delim_off], 16) codepoint1 = int(char_range[delim_off+2:], 16) for codepoint in range(codepoint0, codepoint1 + 1): codepoint_list.append(codepoint) else: codepoint = int(char_range, 16) codepoint_list.append(codepoint) f.close() codepoint_list.sort() index0 = 0 count = len(codepoint_list) records = list() while index0 < count: index1 = index0 + 1 while index1 < count and codepoint_list[index1] == codepoint_list[index1-1] + 1: index1 += 1 if index1 - index0 > 1: # Range of codepoints records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1])) else: # Single codepoint records.append("S(0x{:04x})".format(codepoint_list[index0])) index0 = index1 sys.stdout.write("static const unsigned PUNCT_MAP[] = {\n") sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110, initial_indent = " ", subsequent_indent=" "))) sys.stdout.write("\n};\n\n")