ลองใช้ Tantivy ซึ่งเอาไว้ทำ Search Engine เขียนด้วย Rust

October 22, 2019

ผมทำตามตัวอย่างที่ https://tantivy-search.github.io/examples/basic_search.html แต่โมพวก field แล้วก็แก้ให้เป็นสองไฟล์แบบที่อยากใช้งาน

Cargo.toml

[package]
name = "v_search"
version = "0.1.0"
authors = ["Vee Satayamas <vee.sa@protonmail.com>"]
edition = "2018"

[dependencies]
tantivy = "0.10.2"

[[bin]]
name = "make_index"
path = "src/make_index.rs"

[[bin]]
name = "search"
path = "src/search.rs"

make_index.rs

ส่วนที่ผมงงแรก ๆ เลยคือ STORED อ่านมาบอกว่าสามารถอ่านข้อมูลคืนมาด้วย id เข้าใจว่าถ้า field ไหนไม่ STORED คือทำ index อย่างเดียวจริง ๆ แต่ข้อมูลต้นฉบับหาย แต่ผมก็ยังไม่ลองว่าถ้าไม่ใส่มันจะไม่ได้เชียวหรือ ?

use tantivy::doc;
use tantivy::schema::*;
use tantivy::Index;

fn main() -> tantivy::Result<()> {
    let index_path = "idx"; 
    let mut schema_builder = Schema::builder();
    schema_builder.add_text_field("textunit", TEXT | STORED);
    let schema = schema_builder.build();
    let textunit = schema.get_field("textunit").unwrap();
    let index = Index::create_in_dir(&index_path, schema.clone())?;
    let mut index_writer = index.writer(50_000_000)?;

    index_writer.add_document(doc!(
        textunit => "A",
    ));

    index_writer.commit()?;
    Ok(())
}

search.rs

use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::*;
use tantivy::Index;
use tantivy::ReloadPolicy;

fn main() -> tantivy::Result<()> {
    let index_path = "idx"; 
    let mut schema_builder = Schema::builder();
    schema_builder.add_text_field("textunit", TEXT | STORED);
    let schema = schema_builder.build();
    let textunit = schema.get_field("textunit").unwrap();
    let index = Index::open_in_dir(index_path)?;
    let reader = index
        .reader_builder()
        .reload_policy(ReloadPolicy::OnCommit)
        .try_into()?;
    let searcher = reader.searcher();

    let query_parser = QueryParser::for_index(&index, vec![textunit]);
    let query = query_parser.parse_query("A")?;
    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
    for (_score, doc_address) in top_docs {
        let retrieved_doc = searcher.doc(doc_address)?;
        println!("{}", schema.to_json(&retrieved_doc));
    }
    Ok(())
}

อันนี้ผมก็คือทำ index ไว้ใน directory ชื่อ idx แล้ว แล้วอีกไฟล์ก็มา search แค่นี้เลย