Skip to content

Commit

Permalink
1.2.9 // Structural optimizations. (#72)
Browse files Browse the repository at this point in the history
* Compositor // Update things related to walkedAnchors().

* Node // Add spanLength property.

* NodeAnchor // Structural optimization.

* Fix malfunctioned fixNodeWithCandidate() since v1.2.8.

* Sync fixes from MegrezNT v1.2.9 update.

* NodeAnchor // Remove KeyLength.
  • Loading branch information
ShikiSuen authored Aug 1, 2022
1 parent 3c152fb commit 0af1710
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 70 deletions.
26 changes: 17 additions & 9 deletions Sources/Megrez/1_Compositor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez {
/// 組字器。
public class Compositor: Grid {
/// 文字輸入方向
/// 就文字輸入方向而言的方向。
public enum TypingDirection { case front, rear }
/// 給被丟掉的節點路徑施加的負權重。
private let kDroppedPathScore: Double = -999
Expand All @@ -38,7 +38,14 @@ extension Megrez {
private var langModel: LangModelProtocol
/// 允許查詢當前游標位置屬於第幾個幅位座標(從 0 開始算)。
private(set) var cursorRegionMap: [Int: Int] = .init()
private(set) var walkedAnchors: [Megrez.NodeAnchor] = [] // 用以記錄爬過的節錨的陣列
/// 用以記錄爬過的節錨的陣列。
private(set) var walkedAnchors: [NodeAnchor] = []

/// 該函式用以更新爬過的節錨的陣列。
/// - Parameter nodes: 傳入的節點陣列。
public func updateWalkedAnchors(with nodes: [Node]) {
walkedAnchors = nodes.map { Megrez.NodeAnchor(node: $0) }
}

/// 公開:多字讀音鍵當中用以分割漢字讀音的記號,預設為空。
public var joinSeparator: String = "-"
Expand All @@ -47,7 +54,7 @@ extension Megrez {
public var length: Int { readings.count }

/// 按幅位來前後移動游標。
/// - Parameter direction: 移動方向
/// - Parameter direction: 移動方向
/// - Returns: 該操作是否順利完成。
@discardableResult public func jumpCursorBySpan(to direction: TypingDirection) -> Bool {
switch direction {
Expand All @@ -64,8 +71,9 @@ extension Megrez {
case currentRegionBorderRear:
switch direction {
case .front:
if currentRegion > walkedAnchors.count { cursor = readings.count }
else { cursor = walkedAnchors[0...currentRegion].map(\.spanLength).reduce(0, +) }
cursor =
(currentRegion > walkedAnchors.count)
? readings.count : walkedAnchors[0...currentRegion].map(\.spanLength).reduce(0, +)
case .rear:
cursor = walkedAnchors[0..<aRegionForward].map(\.spanLength).reduce(0, +)
}
Expand All @@ -87,7 +95,7 @@ extension Megrez {
/// - separator: 多字讀音鍵當中用以分割漢字讀音的記號,預設為空。
public init(lm: LangModelProtocol, length: Int = 10, separator: String = "-") {
langModel = lm
super.init(spanLength: abs(length)) // 防呆
super.init(spanLengthLimit: abs(length)) // 防呆
joinSeparator = separator
}

Expand Down Expand Up @@ -180,7 +188,7 @@ extension Megrez {

var paths = [[NodeAnchor]]()
let nodes = nodesEndingAt(location: location).stableSorted {
$0.scoreForSort > $1.scoreForSort
$0.node.score > $1.node.score
}

guard !nodes.isEmpty else { return .init() } // 防止下文出現範圍外索引的錯誤
Expand Down Expand Up @@ -269,7 +277,7 @@ extension Megrez {
if hasMatchedNode(location: p, spanLength: q, key: combinedReading) { continue }
let unigrams: [Unigram] = langModel.unigramsFor(key: combinedReading)
if unigrams.isEmpty { continue }
let n = Node(key: combinedReading, unigrams: unigrams)
let n: Node = .init(key: combinedReading, spanLength: q, unigrams: unigrams)
insertNode(node: n, location: p, spanLength: q)
}
}
Expand All @@ -281,6 +289,7 @@ extension Megrez {

internal func updateCursorJumpingTables(_ anchors: [NodeAnchor]) {
var cursorRegionMapDict = [Int: Int]()
cursorRegionMapDict[-1] = 0 // 防呆
var counter = 0
for (i, anchor) in anchors.enumerated() {
for _ in 0..<anchor.spanLength {
Expand All @@ -289,7 +298,6 @@ extension Megrez {
}
}
cursorRegionMapDict[counter] = anchors.count
cursorRegionMapDict[-1] = 0 // 防呆
cursorRegionMap = cursorRegionMapDict
}
}
Expand Down
34 changes: 8 additions & 26 deletions Sources/Megrez/2_Grid.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ extension Megrez {
public var isEmpty: Bool { spans.isEmpty }

/// 初期化轨格。
public init(spanLength: Int = 10) {
maxBuildSpanLength = spanLength
public init(spanLengthLimit: Int = 10) {
maxBuildSpanLength = spanLengthLimit
spans = [Megrez.SpanUnit]()
}

Expand Down Expand Up @@ -98,7 +98,7 @@ extension Megrez {
spans.remove(at: location)
}
for i in 0..<location {
// zaps overlapping spans
// 處理掉被損毀的或者重複的幅位。
spans[i].dropNodesBeyond(length: location - i)
}
}
Expand All @@ -114,13 +114,7 @@ extension Megrez {
let span = spans[location]
for i in 1...maxBuildSpanLength {
if let np = span.nodeOf(length: i) {
results.append(
.init(
node: np,
location: location,
spanLength: i
)
)
results.append(.init(node: np))
}
}
return results // 已證實不會有空節點產生。
Expand All @@ -137,13 +131,7 @@ extension Megrez {
let span = spans[i]
if i + span.maxLength < location { continue }
if let np = span.nodeOf(length: location - i) {
results.append(
.init(
node: np,
location: i,
spanLength: location - i
)
)
results.append(.init(node: np))
}
}
return results // 已證實不會有空節點產生。
Expand All @@ -162,13 +150,7 @@ extension Megrez {
for j in 1...span.maxLength {
if i + j < location { continue }
if let np = span.nodeOf(length: j) {
results.append(
.init(
node: np,
location: i,
spanLength: location - i
)
)
results.append(.init(node: np))
}
}
}
Expand All @@ -193,7 +175,7 @@ extension Megrez {
@discardableResult public func fixNodeWithCandidateLiteral(_ value: String, at location: Int) -> NodeAnchor {
let location = abs(location) // 防呆
var node = NodeAnchor()
for theAnchor in nodesOverlappedAt(location: location) {
for theAnchor in nodesCrossingOrEndingAt(location: location) {
let candidates = theAnchor.node.candidates
// 將該位置的所有節點的候選字詞鎖定狀態全部重設。
theAnchor.node.resetCandidate()
Expand All @@ -217,7 +199,7 @@ extension Megrez {
@discardableResult public func fixNodeWithCandidate(_ pair: KeyValuePaired, at location: Int) -> NodeAnchor {
let location = abs(location) // 防呆
var node = NodeAnchor()
for theAnchor in nodesOverlappedAt(location: location) {
for theAnchor in nodesCrossingOrEndingAt(location: location) {
let candidates = theAnchor.node.candidates
// 將該位置的所有節點的候選字詞鎖定狀態全部重設。
theAnchor.node.resetCandidate()
Expand Down
30 changes: 16 additions & 14 deletions Sources/Megrez/3_NodeAnchor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,35 @@ extension Megrez {
public var isEmpty: Bool { node.key.isEmpty }
/// 節點。一個節锚內不一定有節點。
public var node: Node = .init()
/// 節锚所在的位置。
public var location: Int = 0
/// 指定的幅位長度。
public var spanLength: Int = 0
public var spanLength: Int { node.spanLength }
/// 獲取用來比較的權重。
public var scoreForSort: Double { node.score }
/// 累計權重。
public var mass: Double = 0.0
/// 索引鍵的長度。
public var keyLength: Int {
isEmpty ? node.key.count : 0
/// 單元圖陣列。
public var unigrams: [Unigram] { node.unigrams }
/// 雙元圖陣列。
public var bigrams: [Bigram] { node.bigrams }
/// 鍵。
public var key: String { node.key }

/// 初期化一個節錨。
public init(node: Node = .init(), mass: Double? = nil) {
self.node = node
self.mass = mass ?? self.node.score
}

/// 將該節錨雜湊化。
public func hash(into hasher: inout Hasher) {
hasher.combine(node)
hasher.combine(location)
hasher.combine(spanLength)
hasher.combine(mass)
}

/// 將當前節锚列印成一個字串。
public var description: String {
var stream = ""
stream += "{@(" + String(location) + "," + String(spanLength) + "),"
stream += "{@(" + String(spanLength) + "),"
if node.key.isEmpty {
stream += node.description
} else {
Expand All @@ -60,11 +67,6 @@ extension Megrez {
stream += "}"
return stream
}

/// 獲取用來比較的權重。
public var scoreForSort: Double {
isEmpty ? node.score : 0
}
}
}

Expand Down
12 changes: 8 additions & 4 deletions Sources/Megrez/4_Node.swift
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,15 @@ extension Megrez {
lhs.key == rhs.key && lhs.score == rhs.score && lhs.unigrams == rhs.unigrams && lhs.bigrams == rhs.bigrams
&& lhs.candidates == rhs.candidates && lhs.valueUnigramIndexMap == rhs.valueUnigramIndexMap
&& lhs.precedingBigramMap == rhs.precedingBigramMap && lhs.isCandidateFixed == rhs.isCandidateFixed
&& lhs.selectedUnigramIndex == rhs.selectedUnigramIndex
&& lhs.selectedUnigramIndex == rhs.selectedUnigramIndex && lhs.spanLength == rhs.spanLength
}

public func hash(into hasher: inout Hasher) {
hasher.combine(key)
hasher.combine(score)
hasher.combine(unigrams)
hasher.combine(bigrams)
hasher.combine(spanLength)
hasher.combine(candidates)
hasher.combine(valueUnigramIndexMap)
hasher.combine(precedingBigramMap)
Expand All @@ -50,9 +51,11 @@ extension Megrez {
/// 當前節點的當前被選中的候選字詞「在該節點內的」目前的權重。
private(set) var score: Double = 0
/// 單元圖陣列。
private var unigrams: [Unigram]
private(set) var unigrams: [Unigram]
/// 雙元圖陣列。
private var bigrams: [Bigram]
private(set) var bigrams: [Bigram]
/// 指定的幅位長度。
public var spanLength: Int = 0
/// 候選字詞陣列,以鍵值陣列的形式存在。
private(set) var candidates: [KeyValuePaired] = []
/// 專門「用單元圖資料值來調查索引值」的辭典。
Expand Down Expand Up @@ -83,10 +86,11 @@ extension Megrez {
/// - key: 索引鍵。
/// - unigrams: 單元圖陣列。
/// - bigrams: 雙元圖陣列(非必填)。
public init(key: String = "", unigrams: [Megrez.Unigram] = [], bigrams: [Megrez.Bigram] = []) {
public init(key: String = "", spanLength: Int = 0, unigrams: [Megrez.Unigram] = [], bigrams: [Megrez.Bigram] = []) {
self.key = key
self.unigrams = unigrams
self.bigrams = bigrams
self.spanLength = spanLength

self.unigrams.sort {
$0.score > $1.score
Expand Down
4 changes: 2 additions & 2 deletions Sources/Megrez/5_LanguageModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public protocol LangModelProtocol {
func unigramsFor(key: String) -> [Megrez.Unigram]

/// 給定當前鍵與前述鍵,讓語言模型找給一組雙元圖陣列。
func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram]
func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram]

/// 給定鍵,確認是否有單元圖記錄在庫。
func hasUnigramsFor(key: String) -> Bool
Expand All @@ -47,7 +47,7 @@ extension Megrez {
}

/// 給定當前鍵與前述鍵,讓語言模型找給一組雙元圖陣列。
open func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] {
open func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] {
precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]()
}

Expand Down
4 changes: 2 additions & 2 deletions Tests/MegrezTests/LMDataForTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import Megrez
// MARK: - 用以測試的語言模型(簡單範本型)

class SimpleLM: LangModelProtocol {
func bigramsForKeys(precedingKey _: String, key _: String) -> [Megrez.Bigram] {
func bigramsFor(precedingKey _: String, key _: String) -> [Megrez.Bigram] {
.init()
}

Expand Down Expand Up @@ -65,7 +65,7 @@ class SimpleLM: LangModelProtocol {
}

class MockLM: LangModelProtocol {
func bigramsForKeys(precedingKey _: String, key _: String) -> [Megrez.Bigram] {
func bigramsFor(precedingKey _: String, key _: String) -> [Megrez.Bigram] {
.init()
}

Expand Down
Loading

0 comments on commit 0af1710

Please sign in to comment.