Skip to content

Commit

Permalink
Reverse-Walker optimization. (#39 & #41)
Browse files Browse the repository at this point in the history
  • Loading branch information
ShikiSuen authored May 30, 2022
1 parent 8773da1 commit 39105d1
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 62 deletions.
159 changes: 107 additions & 52 deletions Sources/Megrez/1_BlockReadingBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez {
/// 分節讀音槽。
public class BlockReadingBuilder {
/// 給被丟掉的節點路徑施加的負權重。
private let kDroppedPathScore: Double = -999
/// 該分節讀音曹內可以允許的最大詞長。
private var mutMaximumBuildSpanLength = 10
/// 該分節讀音槽的游標位置。
Expand Down Expand Up @@ -144,13 +146,14 @@ extension Megrez {
public func walk(
at location: Int,
score accumulatedScore: Double = 0.0,
nodesLimit: Int = 0,
balanced: Bool = false
joinedPhrase: String = "",
longPhrases arrLongPhrases: [String] = .init()
) -> [NodeAnchor] {
Array(
reverseWalk(
at: location, score: accumulatedScore,
nodesLimit: nodesLimit, balanced: balanced
joinedPhrase: joinedPhrase,
longPhrases: arrLongPhrases
).reversed())
}

Expand All @@ -163,72 +166,101 @@ extension Megrez {
public func reverseWalk(
at location: Int,
score accumulatedScore: Double = 0.0,
nodesLimit: Int = 0,
balanced: Bool = false
joinedPhrase: String = "",
longPhrases arrLongPhrases: [String] = .init()
) -> [NodeAnchor] {
let location = abs(location) // 防呆
let nodesLimit = abs(nodesLimit) // 防呆
if location == 0 || location > mutGrid.width {
return [] as [NodeAnchor]
return .init()
}

var paths: [[NodeAnchor]] = []
var nodes: [NodeAnchor] = mutGrid.nodesEndingAt(location: location)
var paths = [[NodeAnchor]]()
var nodes = mutGrid.nodesEndingAt(location: location)
var arrLongPhrases = arrLongPhrases

if balanced {
nodes.sort {
$0.balancedScore > $1.balancedScore
}
nodes = nodes.stableSorted {
$0.scoreForSort > $1.scoreForSort
}

for (i, n) in nodes.enumerated() {
// 只檢查前 X 個 NodeAnchor 是否有 node。
// 這裡有 abs 是為了防止有白癡填負數。
if nodesLimit > 0, i == nodesLimit {
break
if let nodeOfNodeZero = nodes[0].node, nodeOfNodeZero.score >= nodeOfNodeZero.kSelectedCandidateScore {
// 在使用者有選過候選字詞的情況下,摒棄非依此據而成的節點路徑。
var nodeZero = nodes[0]
nodeZero.accumulatedScore = accumulatedScore + nodeOfNodeZero.score
var path: [NodeAnchor] = reverseWalk(at: location - nodeZero.spanningLength, score: nodeZero.accumulatedScore)
path.insert(nodeZero, at: 0)
paths.append(path)
} else if arrLongPhrases.count > 0 {
var path = [NodeAnchor]()
for theAnchor in nodes {
guard let theNode = theAnchor.node else { continue }
var theAnchor = theAnchor
let joinedValue = theNode.currentKeyValue.value + joinedPhrase
// 如果只是一堆單漢字的節點組成了同樣的長詞的話,直接棄用這個節點路徑。
// 打比方說「八/月/中/秋/山/林/涼」與「八月/中秋/山林/涼」在使用者來看
// 是「結果等價」的,那就扔掉前者。
if arrLongPhrases.contains(joinedValue) {
theAnchor.accumulatedScore = kDroppedPathScore
path.insert(theAnchor, at: 0)
paths.append(path)
continue
}
theAnchor.accumulatedScore = accumulatedScore + theNode.score
if joinedValue.count >= arrLongPhrases[0].count {
path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore, joinedPhrase: "",
longPhrases: .init())
} else {
path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore, joinedPhrase: joinedValue,
longPhrases: arrLongPhrases)
}
path.insert(theAnchor, at: 0)
paths.append(path)
}

var n = n
guard let nNode = n.node else {
continue
} else {
// 看看當前格位有沒有更長的候選字詞。
var arrLongPhrasesNeo = [String]()
for theAnchor in nodes {
guard let theNode = theAnchor.node else { continue }
if theAnchor.spanningLength > 1 {
arrLongPhrases.append(theNode.currentKeyValue.value)
}
}

n.accumulatedScore = accumulatedScore + nNode.score

// 利用幅位長度來決定權重。
// 這樣一來,例:「再見」比「在」與「見」的權重更高。
if balanced {
n.accumulatedScore += n.additionalWeights
arrLongPhrasesNeo = arrLongPhrasesNeo.stableSorted {
$0.count > $1.count
}

var path: [NodeAnchor] = reverseWalk(
at: location - n.spanningLength,
score: n.accumulatedScore
)

path.insert(n, at: 0)

paths.append(path)

// 始終使用固定的候選字詞
if balanced, nNode.score >= 0 {
break
for theAnchor in nodes {
var theAnchor = theAnchor
guard let theNode = theAnchor.node else { continue }
theAnchor.accumulatedScore = accumulatedScore + theNode.score
var path = [NodeAnchor]()
if theAnchor.spanningLength > 1 {
path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore, joinedPhrase: "",
longPhrases: .init())
} else {
path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore,
joinedPhrase: theNode.currentKeyValue.value, longPhrases: arrLongPhrasesNeo)
}
path.insert(theAnchor, at: 0)
paths.append(path)
}
}

if !paths.isEmpty {
if var result = paths.first {
for value in paths {
if let vLast = value.last, let rLast = result.last {
if vLast.accumulatedScore > rLast.accumulatedScore {
result = value
}
}
}
return result
guard !paths.isEmpty else {
return .init()
}

var result: [NodeAnchor] = paths[0]
for neta in paths {
if neta.last!.accumulatedScore > result.last!.accumulatedScore {
result = neta
}
}
return [] as [NodeAnchor]

return result
}

// MARK: - Private functions
Expand Down Expand Up @@ -266,3 +298,26 @@ extension Megrez {
}
}
}

// MARK: - Stable Sort Extension

// Reference: https://stackoverflow.com/a/50545761/4162914

extension Sequence {
/// Return a stable-sorted collection.
///
/// - Parameter areInIncreasingOrder: Return nil when two element are equal.
/// - Returns: The sorted collection.
func stableSorted(
by areInIncreasingOrder: (Element, Element) throws -> Bool
)
rethrows -> [Element]
{
try enumerated()
.sorted { a, b -> Bool in
try areInIncreasingOrder(a.element, b.element)
|| (a.offset < b.offset && !areInIncreasingOrder(b.element, a.element))
}
.map(\.element)
}
}
11 changes: 3 additions & 8 deletions Sources/Megrez/3_NodeAnchor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,9 @@ extension Megrez {
return stream
}

/// 獲取加權量。
public var additionalWeights: Double {
(Double(spanningLength) - 1) * 0.75
}

/// 獲取平衡權重。
public var balancedScore: Double {
(node?.score ?? 0) + additionalWeights
/// 獲取用來比較的權重。
public var scoreForSort: Double {
node?.score ?? 0
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion Sources/Megrez/4_Node.swift
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ extension Megrez {
/// 用來登記「當前選中的單元圖」的索引值的變數。
private var mutSelectedUnigramIndex: Int = 0
/// 用來登記要施加給「『被標記為選中狀態』的候選字詞」的複寫權重的數值。
private let kSelectedCandidateScore: Double = 99
public let kSelectedCandidateScore: Double = 99
/// 將當前節點列印成一個字串。
public var description: String {
"(node,key:\(mutKey),fixed:\(mutCandidateFixed ? "true" : "false"),selected:\(mutSelectedUnigramIndex),\(mutUnigrams))"
Expand Down
2 changes: 1 addition & 1 deletion Tests/MegrezTests/MegrezTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ final class MegrezTests: XCTestCase {
var walked = [Megrez.NodeAnchor]()

func walk(at location: Int) {
walked = builder.walk(at: location, score: 0.0, nodesLimit: 3, balanced: true)
walked = builder.walk(at: location, score: 0.0)
}

// 模擬輸入法的行為,每次敲字或選字都重新 walk。
Expand Down

0 comments on commit 39105d1

Please sign in to comment.