Skip to content

Commit

Permalink
enhance: Remove raw tokenizer register.
Browse files Browse the repository at this point in the history
tantivy already register raw tokenizer by default

Signed-off-by: sunby <[email protected]>
  • Loading branch information
sunby committed Nov 21, 2024
1 parent b983ef9 commit 6784869
Showing 1 changed file with 1 addition and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ impl IndexWriterWrapper {

let field: Field;
let mut schema_builder = Schema::builder();
let mut use_raw_tokenizer = false;
match data_type {
TantivyDataType::I64 => {
field = schema_builder.add_i64_field(&field_name, INDEXED);
Expand All @@ -45,11 +44,10 @@ impl IndexWriterWrapper {
}
TantivyDataType::Keyword => {
let text_field_indexing = TextFieldIndexing::default()
.set_tokenizer("raw_tokenizer")
.set_tokenizer("raw")
.set_index_option(IndexRecordOption::Basic);
let text_options = TextOptions::default().set_indexing_options(text_field_indexing);
field = schema_builder.add_text_field(&field_name, text_options);
use_raw_tokenizer = true;
}
TantivyDataType::Text => {
panic!("text should be indexed with analyzer");
Expand All @@ -58,11 +56,6 @@ impl IndexWriterWrapper {
let id_field = schema_builder.add_i64_field("doc_id", FAST);
let schema = schema_builder.build();
let index = Index::create_in_dir(path.clone(), schema).unwrap();
if use_raw_tokenizer {
index
.tokenizers()
.register("raw_tokenizer", tokenizer::RawTokenizer::default());
}
let index_writer = index
.writer_with_num_threads(num_threads, overall_memory_budget_in_bytes)
.unwrap();
Expand Down

0 comments on commit 6784869

Please sign in to comment.