-
Notifications
You must be signed in to change notification settings - Fork 182
/
utils.py
96 lines (81 loc) · 2.59 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import sys, os, warnings
PYTHON_VERSION_MAJOR = sys.version_info[0]
if PYTHON_VERSION_MAJOR < 3:
# In python 2, io.open does not support encoding parameter
from codecs import open
else:
from io import open
# In python 3, strings are used so unicode() is a pass-through
def unicode(s):
return s
# Given a string "id", either turn it into its numerical Unicode
# value, or if it appears to be a hexadecimal string, convert that
# into a number, then convert the number into a five digit hexadecimal
# for use as a file name.
def canonicalId(id):
if isinstance(id, str):
idLen = len(id)
if idLen == 1:
id = ord(id)
elif idLen >= 2 and idLen <= 5:
id = int(id, 16)
else:
raise ValueError("Character id must be a 1-character string with the character itself, or 2-5 hex digit unicode codepoint.")
if not isinstance(id, int):
raise ValueError("canonicalId: id must be int or str")
if id > 0xf and id <= 0xfffff:
return "%05x" % (id)
raise ValueError("Character id out of range")
class SvgFileInfo:
def __init__(self, file, dir):
self.path = os.path.join(dir, file)
self.OK = True
if file[-4:].lower() != ".svg":
warnings.warn("File should have .svg extension. (%s)" % (str(self.path)))
self.OK = False
return
parts = (file[:-4]).split('-')
if len(parts) == 2:
self.variant = parts[1]
elif len(parts) != 1:
warnings.warn("File should have at most 2 parts separated by a dash. (%s)" % (str(file)))
self.OK = False
return
self.id = parts[0]
if self.id != canonicalId(self.id):
warnings.warn("File name not in canonical format (%s)" % (str(self.path)))
self.OK = False
return
def __repr__(self):
return repr(vars(self))
def read(self, SVGHandler=None):
if SVGHandler is None:
from kanjivg import SVGHandler
handler = SVGHandler()
parseXmlFile(self.path, handler)
parsed = list(handler.kanjis.values())
if len(parsed) != 1:
raise Exception("File does not contain 1 kanji entry. (%s)" % (self.path))
return parsed[0]
# Parse kanjivg.xml
def parseXmlFile(path, handler):
from xml.sax import parse
parse(path, handler)
# Make a list of the SVG files in kanji
def listSvgFiles(dir):
list = []
for f in os.listdir(dir):
sfi = SvgFileInfo(f, dir)
if sfi.OK:
list.append(sfi)
return list
# Read in the file kanjivg.xml
def readXmlFile(path, KanjisHandler=None):
if KanjisHandler is None:
from kanjivg import KanjisHandler
handler = KanjisHandler()
parseXmlFile(path, handler)
parsed = list(handler.kanjis.values())
if len(parsed) == 0:
raise Exception("File does not contain any kanji entries. (%s)" % (path))
return handler.kanjis