Skip to content

Commit 1cd0628

Browse files
uklotzdePSeitz
authored andcommitted
Replace chrono with time (#1307)
For date values `chrono` has been replaced with `time` - The `time` crate is re-exported as `tantivy::time` instead of `tantivy::chrono`. - The type alias `tantivy::DateTime` has been removed. - `Value::Date` wraps `time::PrimitiveDateTime` without time zone information. - Internally date/time values are stored as seconds since UNIX epoch in UTC. - Converting a `time::OffsetDateTime` to `Value::Date` implicitly converts the value into UTC. If this is not desired do the time zone conversion yourself and use `time::PrimitiveDateTime` directly instead. Closes #1304
1 parent f5056d4 commit 1cd0628

20 files changed

+323
-190
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
Unreleased
2+
================================
3+
- For date values `chrono` has been replaced with `time` (@uklotzde) #1304 :
4+
- The `time` crate is re-exported as `tantivy::time` instead of `tantivy::chrono`.
5+
- The type alias `tantivy::DateTime` has been removed.
6+
- `Value::Date` wraps `time::PrimitiveDateTime` without time zone information.
7+
- Internally date/time values are stored as seconds since UNIX epoch in UTC.
8+
- Converting a `time::OffsetDateTime` to `Value::Date` implicitly converts the value into UTC.
9+
If this is not desired do the time zone conversion yourself and use `time::PrimitiveDateTime`
10+
directly instead.
11+
112
Tantivy 0.17
213
================================
314
- LogMergePolicy now triggers merges if the ratio of deleted documents reaches a threshold (@shikhar @fulmicoton) [#115](https://github.com/quickwit-oss/tantivy/issues/115)

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ thiserror = "1.0.24"
4848
htmlescape = "0.3.1"
4949
fail = "0.5"
5050
murmurhash32 = "0.2"
51-
chrono = "0.4.19"
51+
time = { version = "0.3.7", features = ["serde-well-known"] }
5252
smallvec = "1.6.1"
5353
rayon = "1.5"
5454
lru = "0.7.0"

query-grammar/src/query_grammar.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> {
6767
///
6868
/// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99
6969
/// We delegate rejecting such invalid dates to the logical AST compuation code
70-
/// which invokes chrono::DateTime::parse_from_rfc3339 on the value to actually parse
70+
/// which invokes time::OffsetDateTime::parse(..., &Rfc3339) on the value to actually parse
7171
/// it (instead of merely extracting the datetime value as string as done here).
7272
fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
7373
let two_digits = || recognize::<String, _, _>((digit(), digit()));

src/collector/histogram_collector.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,9 @@ mod tests {
152152
use query::AllQuery;
153153

154154
use super::{add_vecs, HistogramCollector, HistogramComputer};
155-
use crate::chrono::{TimeZone, Utc};
156155
use crate::schema::{Schema, FAST};
157-
use crate::{doc, query, Index};
156+
use crate::time::{Date, Month};
157+
use crate::{doc, query, DateTime, Index};
158158

159159
#[test]
160160
fn test_add_histograms_simple() {
@@ -273,16 +273,20 @@ mod tests {
273273
let schema = schema_builder.build();
274274
let index = Index::create_in_ram(schema);
275275
let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
276-
writer.add_document(doc!(date_field=>Utc.ymd(1982, 9, 17).and_hms(0, 0,0)))?;
277-
writer.add_document(doc!(date_field=>Utc.ymd(1986, 3, 9).and_hms(0, 0, 0)))?;
278-
writer.add_document(doc!(date_field=>Utc.ymd(1983, 9, 27).and_hms(0, 0, 0)))?;
276+
writer.add_document(doc!(date_field=>DateTime::new_primitive(Date::from_calendar_date(1982, Month::September, 17)?.with_hms(0, 0, 0)?)))?;
277+
writer.add_document(
278+
doc!(date_field=>DateTime::new_primitive(Date::from_calendar_date(1986, Month::March, 9)?.with_hms(0, 0, 0)?)),
279+
)?;
280+
writer.add_document(doc!(date_field=>DateTime::new_primitive(Date::from_calendar_date(1983, Month::September, 27)?.with_hms(0, 0, 0)?)))?;
279281
writer.commit()?;
280282
let reader = index.reader()?;
281283
let searcher = reader.searcher();
282284
let all_query = AllQuery;
283285
let week_histogram_collector = HistogramCollector::new(
284286
date_field,
285-
Utc.ymd(1980, 1, 1).and_hms(0, 0, 0),
287+
DateTime::new_primitive(
288+
Date::from_calendar_date(1980, Month::January, 1)?.with_hms(0, 0, 0)?,
289+
),
286290
3600 * 24 * 365, // it is just for a unit test... sorry leap years.
287291
10,
288292
);

src/collector/tests.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
use std::str::FromStr;
2-
31
use super::*;
42
use crate::collector::{Count, FilterCollector, TopDocs};
53
use crate::core::SegmentReader;
64
use crate::fastfield::{BytesFastFieldReader, DynamicFastFieldReader, FastFieldReader};
75
use crate::query::{AllQuery, QueryParser};
86
use crate::schema::{Field, Schema, FAST, TEXT};
7+
use crate::time::format_description::well_known::Rfc3339;
8+
use crate::time::OffsetDateTime;
99
use crate::{doc, DateTime, DocAddress, DocId, Document, Index, Score, Searcher, SegmentOrdinal};
1010

1111
pub const TEST_COLLECTOR_WITH_SCORE: TestCollector = TestCollector {
@@ -26,11 +26,11 @@ pub fn test_filter_collector() -> crate::Result<()> {
2626
let index = Index::create_in_ram(schema);
2727

2828
let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
29-
index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::from_str("1898-04-09T00:00:00+00:00").unwrap()))?;
30-
index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::from_str("2020-04-09T00:00:00+00:00").unwrap()))?;
31-
index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::from_str("2019-04-20T00:00:00+00:00").unwrap()))?;
32-
index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()))?;
33-
index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::from_str("2018-04-09T00:00:00+00:00").unwrap()))?;
29+
index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::new_utc(OffsetDateTime::parse("1898-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
30+
index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::new_utc(OffsetDateTime::parse("2020-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
31+
index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::new_utc(OffsetDateTime::parse("2019-04-20T00:00:00+00:00", &Rfc3339).unwrap())))?;
32+
index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::new_utc(OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
33+
index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::new_utc(OffsetDateTime::parse("2018-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
3434
index_writer.commit()?;
3535

3636
let reader = index.reader()?;
@@ -55,7 +55,9 @@ pub fn test_filter_collector() -> crate::Result<()> {
5555
assert_eq!(filtered_top_docs.len(), 0);
5656

5757
fn date_filter(value: DateTime) -> bool {
58-
(value - DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()).num_weeks() > 0
58+
(value.to_utc() - OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())
59+
.whole_weeks()
60+
> 0
5961
}
6062

6163
let filter_dates_collector = FilterCollector::new(date, &date_filter, TopDocs::with_limit(5));

src/collector/top_score_collector.rs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -714,7 +714,9 @@ mod tests {
714714
use crate::collector::Collector;
715715
use crate::query::{AllQuery, Query, QueryParser};
716716
use crate::schema::{Field, Schema, FAST, STORED, TEXT};
717-
use crate::{DocAddress, DocId, Index, IndexWriter, Score, SegmentReader};
717+
use crate::time::format_description::well_known::Rfc3339;
718+
use crate::time::OffsetDateTime;
719+
use crate::{DateTime, DocAddress, DocId, Index, IndexWriter, Score, SegmentReader};
718720

719721
fn make_index() -> crate::Result<Index> {
720722
let mut schema_builder = Schema::builder();
@@ -890,28 +892,32 @@ mod tests {
890892

891893
#[test]
892894
fn test_top_field_collector_datetime() -> crate::Result<()> {
893-
use std::str::FromStr;
894895
let mut schema_builder = Schema::builder();
895896
let name = schema_builder.add_text_field("name", TEXT);
896897
let birthday = schema_builder.add_date_field("birthday", FAST);
897898
let schema = schema_builder.build();
898899
let index = Index::create_in_ram(schema);
899900
let mut index_writer = index.writer_for_tests()?;
900-
let pr_birthday = crate::DateTime::from_str("1898-04-09T00:00:00+00:00")?;
901+
let pr_birthday = DateTime::new_utc(OffsetDateTime::parse(
902+
"1898-04-09T00:00:00+00:00",
903+
&Rfc3339,
904+
)?);
901905
index_writer.add_document(doc!(
902906
name => "Paul Robeson",
903-
birthday => pr_birthday
907+
birthday => pr_birthday,
904908
))?;
905-
let mr_birthday = crate::DateTime::from_str("1947-11-08T00:00:00+00:00")?;
909+
let mr_birthday = DateTime::new_utc(OffsetDateTime::parse(
910+
"1947-11-08T00:00:00+00:00",
911+
&Rfc3339,
912+
)?);
906913
index_writer.add_document(doc!(
907914
name => "Minnie Riperton",
908-
birthday => mr_birthday
915+
birthday => mr_birthday,
909916
))?;
910917
index_writer.commit()?;
911918
let searcher = index.reader()?.searcher();
912919
let top_collector = TopDocs::with_limit(3).order_by_fast_field(birthday);
913-
let top_docs: Vec<(crate::DateTime, DocAddress)> =
914-
searcher.search(&AllQuery, &top_collector)?;
920+
let top_docs: Vec<(DateTime, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
915921
assert_eq!(
916922
&top_docs[..],
917923
&[

src/error.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,21 @@ impl<Guard> From<PoisonError<Guard>> for TantivyError {
149149
}
150150
}
151151

152-
impl From<chrono::ParseError> for TantivyError {
153-
fn from(err: chrono::ParseError) -> TantivyError {
154-
TantivyError::InvalidArgument(err.to_string())
152+
impl From<time::error::Format> for TantivyError {
153+
fn from(err: time::error::Format) -> TantivyError {
154+
TantivyError::InvalidArgument(format!("Date formatting error: {err}"))
155+
}
156+
}
157+
158+
impl From<time::error::Parse> for TantivyError {
159+
fn from(err: time::error::Parse) -> TantivyError {
160+
TantivyError::InvalidArgument(format!("Date parsing error: {err}"))
161+
}
162+
}
163+
164+
impl From<time::error::ComponentRange> for TantivyError {
165+
fn from(err: time::error::ComponentRange) -> TantivyError {
166+
TantivyError::InvalidArgument(format!("Date range error: {err}"))
155167
}
156168
}
157169

src/fastfield/mod.rs

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,8 @@ pub use self::readers::FastFieldReaders;
3030
pub(crate) use self::readers::{type_and_cardinality, FastType};
3131
pub use self::serializer::{CompositeFastFieldSerializer, FastFieldDataAccess, FastFieldStats};
3232
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
33-
use crate::chrono::{NaiveDateTime, Utc};
3433
use crate::schema::{Cardinality, FieldType, Type, Value};
35-
use crate::DocId;
34+
use crate::{DateTime, DocId};
3635

3736
mod alive_bitset;
3837
mod bytes;
@@ -161,14 +160,14 @@ impl FastValue for f64 {
161160
}
162161
}
163162

164-
impl FastValue for crate::DateTime {
163+
impl FastValue for DateTime {
165164
fn from_u64(timestamp_u64: u64) -> Self {
166-
let timestamp_i64 = i64::from_u64(timestamp_u64);
167-
crate::DateTime::from_utc(NaiveDateTime::from_timestamp(timestamp_i64, 0), Utc)
165+
let unix_timestamp = i64::from_u64(timestamp_u64);
166+
Self::from_unix_timestamp(unix_timestamp)
168167
}
169168

170169
fn to_u64(&self) -> u64 {
171-
self.timestamp().to_u64()
170+
self.to_unix_timestamp().to_u64()
172171
}
173172

174173
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
@@ -179,7 +178,7 @@ impl FastValue for crate::DateTime {
179178
}
180179

181180
fn as_u64(&self) -> u64 {
182-
self.timestamp().as_u64()
181+
self.to_unix_timestamp().as_u64()
183182
}
184183

185184
fn to_type() -> Type {
@@ -188,12 +187,12 @@ impl FastValue for crate::DateTime {
188187
}
189188

190189
fn value_to_u64(value: &Value) -> u64 {
191-
match *value {
192-
Value::U64(ref val) => *val,
193-
Value::I64(ref val) => common::i64_to_u64(*val),
194-
Value::F64(ref val) => common::f64_to_u64(*val),
195-
Value::Date(ref datetime) => common::i64_to_u64(datetime.timestamp()),
196-
_ => panic!("Expected a u64/i64/f64 field, got {:?} ", value),
190+
match value {
191+
Value::U64(val) => val.to_u64(),
192+
Value::I64(val) => val.to_u64(),
193+
Value::F64(val) => val.to_u64(),
194+
Value::Date(val) => val.to_u64(),
195+
_ => panic!("Expected a u64/i64/f64/date field, got {:?} ", value),
197196
}
198197
}
199198

@@ -213,6 +212,7 @@ mod tests {
213212
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
214213
use crate::merge_policy::NoMergePolicy;
215214
use crate::schema::{Document, Field, NumericOptions, Schema, FAST};
215+
use crate::time::OffsetDateTime;
216216
use crate::{Index, SegmentId, SegmentReader};
217217

218218
pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
@@ -233,7 +233,7 @@ mod tests {
233233

234234
#[test]
235235
pub fn test_fastfield_i64_u64() {
236-
let datetime = crate::DateTime::from_utc(NaiveDateTime::from_timestamp(0i64, 0), Utc);
236+
let datetime = DateTime::new_utc(OffsetDateTime::UNIX_EPOCH);
237237
assert_eq!(i64::from_u64(datetime.to_u64()), 0i64);
238238
}
239239

@@ -489,7 +489,8 @@ mod tests {
489489
let index = Index::create_in_ram(schema);
490490
let mut index_writer = index.writer_for_tests().unwrap();
491491
index_writer.set_merge_policy(Box::new(NoMergePolicy));
492-
index_writer.add_document(doc!(date_field =>crate::chrono::prelude::Utc::now()))?;
492+
index_writer
493+
.add_document(doc!(date_field =>DateTime::new_utc(OffsetDateTime::now_utc())))?;
493494
index_writer.commit()?;
494495
index_writer.add_document(doc!())?;
495496
index_writer.commit()?;
@@ -509,7 +510,7 @@ mod tests {
509510

510511
#[test]
511512
fn test_default_datetime() {
512-
assert_eq!(crate::DateTime::make_zero().timestamp(), 0i64);
513+
assert_eq!(0, DateTime::make_zero().to_unix_timestamp());
513514
}
514515

515516
#[test]
@@ -526,16 +527,16 @@ mod tests {
526527
let mut index_writer = index.writer_for_tests()?;
527528
index_writer.set_merge_policy(Box::new(NoMergePolicy));
528529
index_writer.add_document(doc!(
529-
date_field => crate::DateTime::from_u64(1i64.to_u64()),
530-
multi_date_field => crate::DateTime::from_u64(2i64.to_u64()),
531-
multi_date_field => crate::DateTime::from_u64(3i64.to_u64())
530+
date_field => DateTime::from_u64(1i64.to_u64()),
531+
multi_date_field => DateTime::from_u64(2i64.to_u64()),
532+
multi_date_field => DateTime::from_u64(3i64.to_u64())
532533
))?;
533534
index_writer.add_document(doc!(
534-
date_field => crate::DateTime::from_u64(4i64.to_u64())
535+
date_field => DateTime::from_u64(4i64.to_u64())
535536
))?;
536537
index_writer.add_document(doc!(
537-
multi_date_field => crate::DateTime::from_u64(5i64.to_u64()),
538-
multi_date_field => crate::DateTime::from_u64(6i64.to_u64())
538+
multi_date_field => DateTime::from_u64(5i64.to_u64()),
539+
multi_date_field => DateTime::from_u64(6i64.to_u64())
539540
))?;
540541
index_writer.commit()?;
541542
let reader = index.reader()?;
@@ -547,23 +548,23 @@ mod tests {
547548
let dates_fast_field = fast_fields.dates(multi_date_field).unwrap();
548549
let mut dates = vec![];
549550
{
550-
assert_eq!(date_fast_field.get(0u32).timestamp(), 1i64);
551+
assert_eq!(date_fast_field.get(0u32).to_unix_timestamp(), 1i64);
551552
dates_fast_field.get_vals(0u32, &mut dates);
552553
assert_eq!(dates.len(), 2);
553-
assert_eq!(dates[0].timestamp(), 2i64);
554-
assert_eq!(dates[1].timestamp(), 3i64);
554+
assert_eq!(dates[0].to_unix_timestamp(), 2i64);
555+
assert_eq!(dates[1].to_unix_timestamp(), 3i64);
555556
}
556557
{
557-
assert_eq!(date_fast_field.get(1u32).timestamp(), 4i64);
558+
assert_eq!(date_fast_field.get(1u32).to_unix_timestamp(), 4i64);
558559
dates_fast_field.get_vals(1u32, &mut dates);
559560
assert!(dates.is_empty());
560561
}
561562
{
562-
assert_eq!(date_fast_field.get(2u32).timestamp(), 0i64);
563+
assert_eq!(date_fast_field.get(2u32).to_unix_timestamp(), 0i64);
563564
dates_fast_field.get_vals(2u32, &mut dates);
564565
assert_eq!(dates.len(), 2);
565-
assert_eq!(dates[0].timestamp(), 5i64);
566-
assert_eq!(dates[1].timestamp(), 6i64);
566+
assert_eq!(dates[0].to_unix_timestamp(), 5i64);
567+
assert_eq!(dates[1].to_unix_timestamp(), 6i64);
567568
}
568569
Ok(())
569570
}

0 commit comments

Comments
 (0)