-
Notifications
You must be signed in to change notification settings - Fork 83
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
121 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
import unicodedata | ||
from dataclasses import dataclass | ||
from pathlib import Path | ||
from typing import Generator, List, Optional, Set, Tuple | ||
|
||
from fontTools.ttLib import TTFont, TTLibFileIsCollectionError # type: ignore | ||
from fontTools.ttLib.ttCollection import TTCollection # type: ignore | ||
|
||
# https://github.com/googlefonts/noto-cjk/raw/main/Sans/Variable/OTC/NotoSansCJK-VF.ttf.ttc | ||
|
||
|
||
@dataclass | ||
class Font: | ||
family: str | ||
name: str | ||
codepoints: Set[int] | ||
is_bitmap: bool | ||
size: int | ||
|
||
|
||
def make_fonts(path: Path, is_bitmap: bool, size: int): | ||
for tt_font in get_tt_fonts(path): | ||
yield Font( | ||
family=font_family(tt_font), | ||
name=font_name(tt_font), | ||
codepoints=set(), | ||
is_bitmap=is_bitmap, | ||
size=size, | ||
) | ||
|
||
|
||
def get_tt_fonts(path: Path) -> Generator[TTFont, None, None]: | ||
try: | ||
yield TTFont(path) | ||
except TTLibFileIsCollectionError: | ||
collection = TTCollection(path) | ||
for font in collection.fonts: | ||
yield font | ||
|
||
|
||
def load_fonts(paths: List[str]) -> List[TTFont]: | ||
return list(fonts_generator(paths)) | ||
|
||
|
||
def fonts_generator(paths: List[str]) -> Generator[TTFont, None, None]: | ||
for path in paths: | ||
try: | ||
yield TTFont(path) | ||
except TTLibFileIsCollectionError: | ||
collection = TTCollection(path) | ||
for font in collection.fonts: | ||
yield font | ||
|
||
|
||
def font_family(font: TTFont) -> str: | ||
return font["name"].getDebugName(1) | ||
|
||
|
||
def font_name(font: TTFont) -> str: | ||
return font["name"].getDebugName(4) | ||
|
||
|
||
def can_render(font: TTFont, char: str) -> bool: | ||
for table in font["cmap"].tables: | ||
if table.isUnicode() and ord(char) in table.cmap: | ||
return True | ||
return False | ||
|
||
|
||
def get_font_for_char(fonts: List[TTFont], char: str) -> Optional[TTFont]: | ||
for font in fonts: | ||
if can_render(font, char): | ||
return font | ||
|
||
|
||
def group_by_font(text: str, fonts: List[TTFont]) -> Generator[Tuple[str, TTFont], None, None]: | ||
if not text: | ||
return | ||
|
||
buffer = "" | ||
font = None | ||
|
||
for char in text: | ||
char_font = get_font_for_char(fonts, char) | ||
if not char_font: | ||
print(f"Cannot render char: {char} {char_name(char)} {ord(char)}") | ||
continue | ||
|
||
if not font: | ||
font = char_font | ||
|
||
if font == char_font: | ||
buffer += char | ||
else: | ||
yield buffer, font | ||
font = char_font | ||
buffer = char | ||
|
||
if buffer and font: | ||
yield buffer, font | ||
|
||
|
||
def char_name(char: str): | ||
try: | ||
return unicodedata.name(char) | ||
except ValueError: | ||
return "NO NAME" | ||
|
||
|
||
text = "Hello 😊! Let's grab a 能ばぜ表聴ゆぞ coffee ☕️ and go for a ride 🚗." | ||
|
||
if __name__ == "__main__": | ||
fonts = make_fonts(Path("NotoSans-Regular.ttf"), False, 20) | ||
fonts = make_fonts(Path("NotoSansMath-Regular.ttf"), False, 20) | ||
fonts = make_fonts(Path("NotoSansCJK-Light.ttf"), False, 20) | ||
fonts = make_fonts(Path("NotoColorEmoji.ttf"), True, 109) | ||
|
||
# with open("chat.200.txt") as f: | ||
# while line := f.readline().strip(): | ||
# for fragment, font in group_by_font(line, fonts): | ||
# print(font_name(font).ljust(25), fragment) |