Skip to content

Commit 9c75942

Browse files
authored
fix merge panic for JSON fields (#2284)
Root cause was the positions buffer had residue positions from the previous term, when the terms were alternating between having and not having positions in JSON (terms have positions, but not numerics). Fixes #2283
1 parent bff7c58 commit 9c75942

File tree

3 files changed

+32
-0
lines changed

3 files changed

+32
-0
lines changed

src/indexer/index_writer.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,6 +1651,7 @@ mod tests {
16511651
force_end_merge: bool,
16521652
) -> crate::Result<Index> {
16531653
let mut schema_builder = schema::Schema::builder();
1654+
let json_field = schema_builder.add_json_field("json", FAST | TEXT | STORED);
16541655
let ip_field = schema_builder.add_ip_addr_field("ip", FAST | INDEXED | STORED);
16551656
let ips_field = schema_builder
16561657
.add_ip_addr_field("ips", IpAddrOptions::default().set_fast().set_indexed());
@@ -1729,7 +1730,9 @@ mod tests {
17291730
id_field=>id,
17301731
))?;
17311732
} else {
1733+
let json = json!({"date1": format!("2022-{id}-01T00:00:01Z"), "date2": format!("{id}-05-01T00:00:01Z"), "id": id, "ip": ip.to_string()});
17321734
index_writer.add_document(doc!(id_field=>id,
1735+
json_field=>json,
17331736
bytes_field => id.to_le_bytes().as_slice(),
17341737
id_opt_field => id,
17351738
ip_field => ip,

src/indexer/merger.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,10 @@ impl IndexMerger {
605605
segment_postings.positions(&mut positions_buffer);
606606
segment_postings.term_freq()
607607
} else {
608+
// The positions_buffer may contain positions from the previous term
609+
// Existence of positions depend on the value type in JSON fields.
610+
// https://github.com/quickwit-oss/tantivy/issues/2283
611+
positions_buffer.clear();
608612
0u32
609613
};
610614

src/indexer/segment_writer.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,31 @@ mod tests {
879879
assert_eq!(searcher.search(&phrase_query, &Count).unwrap(), 0);
880880
}
881881

882+
#[test]
883+
fn test_json_term_with_numeric_merge_panic_regression_bug_2283() {
884+
// https://github.com/quickwit-oss/tantivy/issues/2283
885+
let mut schema_builder = Schema::builder();
886+
let json = schema_builder.add_json_field("json", TEXT);
887+
let schema = schema_builder.build();
888+
let index = Index::create_in_ram(schema);
889+
let mut writer = index.writer_for_tests().unwrap();
890+
let doc = json!({"field": "a"});
891+
writer.add_document(doc!(json=>doc)).unwrap();
892+
writer.commit().unwrap();
893+
let doc = json!({"field": "a", "id": 1});
894+
writer.add_document(doc!(json=>doc.clone())).unwrap();
895+
writer.commit().unwrap();
896+
897+
// Force Merge
898+
writer.wait_merging_threads().unwrap();
899+
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
900+
let segment_ids = index
901+
.searchable_segment_ids()
902+
.expect("Searchable segments failed.");
903+
index_writer.merge(&segment_ids).wait().unwrap();
904+
assert!(index_writer.wait_merging_threads().is_ok());
905+
}
906+
882907
#[test]
883908
fn test_bug_regression_1629_position_when_array_with_a_field_value_that_does_not_contain_any_token(
884909
) {

0 commit comments

Comments
 (0)