unicode-ccc 0.1.2

Unicode Canonical Combining Class detection
Documentation
#!/usr/bin/env python3

import urllib.request
import os

URL = 'https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedCombiningClass.txt'
FILE_NAME = 'DerivedCombiningClass.txt'

if not os.path.exists(FILE_NAME):
    urllib.request.urlretrieve(URL, FILE_NAME)

classes = dict()
last_class = ''
ranges = []
with open(FILE_NAME) as f:
    for line in f:
        line = line.strip()

        if not line:
            continue

        if line.startswith('# Canonical_Combining_Class='):
            class_name = line[28:].replace('_', '')
            if last_class:
                classes[last_class] = ranges
                ranges = []

            last_class = class_name

        if line.startswith('#'):
            continue

        range, _ = line.split(';')
        range = range.strip()

        if '..' in range:
            start, end = range.split('..')
            ranges.append([start, end])
        else:
            ranges.append([range, None])
    classes[last_class] = ranges

for class_name, ranges in classes.items():
    if class_name == 'NotReordered':
        continue

    for range in ranges:
        if range[1]:
            print('0x{}..=0x{} => {},'.format(range[0], range[1], class_name))
        else:
            print('0x{} => {},'.format(range[0], class_name))