Skip to content

Commit

Permalink
Bump version to v1.1.0
Browse files Browse the repository at this point in the history
Update README
  • Loading branch information
laubonghaudoi committed Jul 6, 2024
1 parent 03c8295 commit 629e515
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 15 deletions.
17 changes: 13 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ pip install cantonesedetect
用下面嘅方法創建一個 `Detector`,然後直接調用 `judge()`就可以得到分類結果:

```python
from cantonesedetect import Detector
from cantonesedetect import CantoneseDetector

# 默認情況下 use_quotes=False, split_seg=False, get_analysis=False
detector = Detector()
detector = CantoneseDetector()

detector.judge('你喺邊度') # cantonese
detector.judge('你在哪裏') # swc
Expand All @@ -95,14 +95,23 @@ detector = Detector(use_quotes=True, split_seg=True, get_analysis=True)
judgement, document_features = detector.judge("他説:「我哋今晚食飯。你想去邊度食?」")

# 打印分析結果
# Print analysis results
print(document_features.get_analysis())
# 打印匹配結果
print(document_features.document_segments_features)

# `document_features` 入面有每個分句嘅 `document_segments_features` 同 `document_segments_judgements`
# `document_features` object contains `document_segments_features` which is a list of segment features
print(document_features.document_segments_features[0].canto_feature)
print(document_features.document_segments_features[0].canto_exclude)
print(document_features.document_segments_features[0].swc_feature)
print(document_features.document_segments_features[0].swc_exclude)
# Also contains `document_segments_judgements` which is a list of judgements of the segments
print([j.value for j in document_features.document_segments_judgements])
```

### CLI

如果直接喺 CLI 調用嘅話,只需要指明`--input`就得。 `--quotes``--split``--print_analysis`三個參數都默認關閉,如果標明就會打開:

```bash
cantonesedetect --input input.txt
# 開啓引號抽取判別、分句判別並且打印分析結果
Expand Down
8 changes: 4 additions & 4 deletions cantonesedetect/Detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from collections import Counter
from typing import List, Tuple, Optional

from cantonesedetect.DocumentFeatures import DocumentFeatures
from cantonesedetect.SegmentFeatures import SegmentFeatures
from cantonesedetect.JudgementTypes import JudgementType
from .DocumentFeatures import DocumentFeatures
from .SegmentFeatures import SegmentFeatures
from .JudgementTypes import JudgementType

# Cantonese characters not found in SWC
CANTO_FEATURE_RE = re.compile(
Expand Down Expand Up @@ -126,7 +126,7 @@ def _get_segment_features(self, segment: str) -> SegmentFeatures:
segment (str): The segment of text to be analyzed.
Returns:
None
segment_features (SegmentFeatures): The features of the segment.
"""
canto_feature = CANTO_FEATURE_RE.findall(segment)
canto_exclude = CANTO_EXCLUDE_RE.findall(segment)
Expand Down
1 change: 1 addition & 0 deletions cantonesedetect/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .Detector import CantoneseDetector
6 changes: 2 additions & 4 deletions cantonesedetect/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import argparse
import sys
from cantonesedetect.Detector import CantoneseDetector
from cantonesedetect import CantoneseDetector

sys.stdout.reconfigure(encoding='utf-8')

Expand All @@ -14,8 +14,6 @@ def main():

argparser.add_argument('--input', type=str, default='input.txt',
help='Specify input text file, where each line is a sentence. Default is `input.txt`.')
argparser.add_argument('--mode', type=str, default='judgement',
help='Specify the mode of output.\n `judgement` for all judgements with a class label, `full` for all the labels prepended to the sentences, Default is `judgement`.')
argparser.add_argument(
'--quotes', help='Separate quotes from matrix and judge them separately.', action='store_true')
argparser.add_argument(
Expand All @@ -32,7 +30,7 @@ def main():
for line in f:
judgement, document_features = detector.judge(line.strip())
analysis = document_features.get_analysis()
sys.stdout.write(f"====================================\nJUDGEMENT: {
sys.stdout.write(f"====================================\nINPUT:{line.strip()}\nJUDGEMENT: {
judgement.value}\n")
sys.stdout.write(analysis)
else:
Expand Down
2 changes: 1 addition & 1 deletion cantonesedetect/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.0"
__version__ = "1.1.0"
7 changes: 5 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
from setuptools import setup, find_packages
from pathlib import Path

# read the contents of your README file
from pathlib import Path
this_directory = Path(__file__).parent
long_description = (this_directory / "README.md").read_text()

# Import version from version.py
from cantonesedetect.version import __version__

setup(
name='cantonesedetect',
version='1.0',
version=__version__,
description='A minimal package that detect Cantonese sentences in Traditional Chinese text.',
author='Chaak Ming Lau, Mingfei Lau and Ann Wai Huen To',
packages=find_packages(),
Expand Down

0 comments on commit 629e515

Please sign in to comment.