如何按日期对Lucene长日期范围进行排序

wmvff8tz  于 2022-11-07  发布在  Lucene
关注(0)|答案(1)|浏览(130)

问题:我想按日期范围搜索图书,但对结果进行排序。按日期范围搜索有效,但文档排序不正确(插入顺序,请参阅ID?):

========== uploadDate:[-2208988800 TO 1893456000] ==========
FOUND: Book[id=1, fileName=Book1 - The beginning.pdf, uploadDate=2000-01-01T09:00:00Z]
FOUND: Book[id=2, fileName=Start Wars 1.pdf, uploadDate=1977-05-25T09:00:00Z]
FOUND: Book[id=3, fileName=Start Wars 2.pdf, uploadDate=1980-05-21T09:00:00Z]
FOUND: Book[id=4, fileName=Start Wars 3.pdf, uploadDate=1983-05-25T08:00:00Z]
FOUND: Book[id=5, fileName=The bible.pdf, uploadDate=2020-01-01T09:00:00Z]
FOUND: Book[id=6, fileName=TheUltimateDeveloperGuide.pdf, uploadDate=2021-02-16T19:00:00Z]

为了按日期对它们进行排序,我将代码改为:
添加NumericDocValuesField

document.add(new StoredField("uploadDate", book.uploadDate().getEpochSecond()));
document.add(new LongPoint("uploadDate", book.uploadDate().getEpochSecond()));
document.add(new NumericDocValuesField("uploadDate", book.uploadDate().getEpochSecond()));

添加一个Sort

// Build the sorter to sort the documents by date
Sort sorter = new Sort();
SortField sortField = new SortField("uploadDate", SortField.Type.LONG, true);
sorter.setSort(sortField, SortField.FIELD_SCORE);

Query query = parser.parse(queryText, "");
TopDocs hits = indexSearcher.search(query, 100, sorter);

问题:我做错了什么?我需要做什么更改才能使文档降序排序(2021第一,20202第二,2000第三...)。看看下面的问题没有帮助:Sorting lucene documents by date

我的代码:

import org.apache.lucene.document.*;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

import java.text.DecimalFormat;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.*;

public class LuceneDocumentExample {

    public static record Book(
        Integer id,
        String fileName,
        Instant uploadDate
    ) {
    }

    private static Directory directory;

    private static IndexWriter indexWriter;

    public static void main(String[] arguments) throws Exception{
        // Create the index
        directory = new RAMDirectory();
        indexWriter = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new MultiFieldAnalyzer()));

        // Define some sample books
        List<Book> books = Arrays.asList(
            new Book(1, "Book1 - The beginning.pdf", createInstant(2000, 1, 1, 10, 0)),
            new Book(2, "Start Wars 1.pdf", createInstant(1977, 5, 25, 10, 0)),
            new Book(3, "Start Wars 2.pdf", createInstant(1980, 5, 21, 10, 0)),
            new Book(4, "Start Wars 3.pdf", createInstant(1983, 5, 25, 10, 0)),
            new Book(5, "The bible.pdf", createInstant(2020, 1, 1, 10, 0)),
            new Book(6, "TheUltimateDeveloperGuide.pdf", createInstant(2021, 2, 16, 20, 0))
        );

        // Add the books
        for (Book book : books) {
            Document document = new Document();

            document.add(new StringField("id", book.id().toString(), Field.Store.YES));

            document.add(new StringField("fileName", book.fileName(), Field.Store.YES));

            document.add(new StoredField("uploadDate", book.uploadDate().getEpochSecond()));
            document.add(new LongPoint("uploadDate", book.uploadDate().getEpochSecond()));
            document.add(new NumericDocValuesField("uploadDate", book.uploadDate().getEpochSecond()));

            indexWriter.addDocument(document);
            indexWriter.commit();
        }

        // Run several test queries to check the functionality
        checkSearchResult("id:1", Set.of(1));
        checkSearchResult("id:6", Set.of(6));
        checkSearchResult("id:1 OR id:2", Set.of(1, 2));
        checkSearchResult("id:1 AND id:2", Set.of());
        checkSearchResult("fileName:TheUltimateDeveloperGuide.pdf", Set.of(6));
        checkSearchResult("id:6 fileName:TheUltimateDeveloperGuide.pdf", Set.of(6));
        checkSearchResult("uploadDate:[" + createInstant(2000, 1, 1, 8, 0).getEpochSecond() + " TO " + createInstant(2000, 1, 1, 12, 0).getEpochSecond() + "]", Set.of(1));
        checkSearchResult("uploadDate:[" + createInstant(1977, 1, 1, 1, 0).getEpochSecond() + " TO " + createInstant(1983, 5, 26, 1, 0).getEpochSecond() + "]", Set.of(2, 3, 4));
        checkSearchResult("uploadDate:[" + createInstant(1900, 1, 1, 1, 0).getEpochSecond() + " TO " + createInstant(2030, 1, 1, 1, 0).getEpochSecond() + "]", Set.of(1, 2, 3, 4, 5, 6));
        checkSearchResult("id:1 uploadDate:[" + createInstant(2000, 1, 1, 8, 0).getEpochSecond() + " TO " + createInstant(2000, 1, 1, 12, 0).getEpochSecond() + "]", Set.of(1));

        // Close the index
        indexWriter.close();
        directory.close();
    }

    public static List<Book> searchDocuments(String queryText) {
        // Create the reader
        try (IndexReader indexReader = DirectoryReader.open(indexWriter)) {
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);

            StandardQueryParser parser = new StandardQueryParser();
            parser.setAnalyzer(indexWriter.getAnalyzer());

            Map<String, PointsConfig> pointsConfigMap = new HashMap<>();
            pointsConfigMap.put("uploadDate", new PointsConfig(new DecimalFormat(), Long.class));
            parser.setPointsConfigMap(pointsConfigMap);

            // Build the sorter to sort the documents by date
            Sort sorter = new Sort();
            SortField sortField = new SortField("uploadDate", SortField.Type.LONG, true);
            sorter.setSort(sortField, SortField.FIELD_SCORE);

            Query query = parser.parse(queryText, "");
            TopDocs hits = indexSearcher.search(query, 100, sorter);

            List<Book> books = new ArrayList<>();
            for (int i = 0; i < hits.scoreDocs.length; i++) {
                int docId = hits.scoreDocs[i].doc;
                Document document = indexSearcher.doc(docId);

                Integer id = Integer.parseInt(document.get("id"));
                String fileName = document.get("fileName");
                Instant uploadDate = Instant.ofEpochSecond(Long.parseLong(document.get("uploadDate")));

                books.add(new Book(id, fileName, uploadDate));
            }
            return books;
        } catch (Exception exception) {
            throw new RuntimeException("Unable to execute query " + queryText + ": " + exception.getMessage(), exception);
        }
    }

    public static void checkSearchResult(String queryText, Set<Integer> expectedIds) {
        System.out.println("========== " + queryText + " ==========");
        List<Book> books = searchDocuments(queryText);
        if (!expectedIds.isEmpty()) {
            for (Integer expectedId : expectedIds) {
                Optional<Book> optionalBook = books.stream().filter(book -> expectedId.equals(book.id())).findAny();
                if (optionalBook.isPresent()) {
                    System.out.println("FOUND: " + optionalBook.get());
                } else {
                    System.out.println("MISSING: " + expectedId);
                }
            }
        } else {
            System.out.println("NONE");
        }
    }

    private static Instant createInstant(Integer year, Integer month, Integer day, Integer hour, Integer minute) {
        LocalDateTime dateTime = LocalDateTime.of(year, month, day, hour, minute);
        return dateTime.atZone(ZoneId.of("Europe/Zurich")).toInstant();
    }
}
j2qf4p5b

j2qf4p5b1#

我的问题与Lucene DateTools的长转换有关。我更改了代码,现在得到了正确的排序:

import org.apache.lucene.document.*;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

import java.text.DecimalFormat;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.*;

public class SearchDemo {

    record Book(Integer id, String fileName, Instant uploadDate) {
    }

    private static Directory directory;

    private static IndexWriter indexWriter;

    public static void main(String[] arguments) {
        try {
            // Create the index
            directory = new RAMDirectory();
            indexWriter = new IndexWriter(directory, new IndexWriterConfig(new MultiFieldAnalyzer()));

            // Define a few books
            List<Book> books = Arrays.asList(
                    new Book(1, "Book1 - The beginning.pdf", createDateString(2000, 1, 1, 10, 0)),
                    new Book(2, "Start Wars 1.pdf", createDateString(1977, 5, 25, 10, 0)),
                    new Book(3, "Start Wars 2.pdf", createDateString(1980, 5, 21, 10, 0)),
                    new Book(4, "Start Wars 3.pdf", createDateString(1983, 5, 25, 10, 0)),
                    new Book(5, "The bible.pdf", createDateString(2020, 1, 1, 10, 0)),
                    new Book(6, "TheUltimateDeveloperGuide.pdf", createDateString(2021, 2, 16, 20, 0))
            );

            // Add the books
            for (Book book : books) {
                addBook(book);
            }

            // Search for all books
            checkSearchResult("id:1", Arrays.asList(1));
            checkSearchResult("id:6", Arrays.asList(6));
            checkSearchResult("id:1 OR id:2", Arrays.asList(1, 2));
            checkSearchResult("id:1 AND id:2", Arrays.asList());
            checkSearchResult("fileName:TheUltimateDeveloperGuide.pdf", Arrays.asList(6));
            checkSearchResult("id:6 AND fileName:TheUltimateDeveloperGuide.pdf", Arrays.asList(6));
            checkSearchResult("uploadDate:[" + toLuceneDate(createDateString(2000, 1, 1, 8, 0)) + " TO " + toLuceneDate(createDateString(2000, 1, 1, 12, 0)) + "]", Arrays.asList(1));
            checkSearchResult("uploadDate:[" + toLuceneDate(createDateString(1977, 1, 1, 1, 0)) + " TO " + toLuceneDate(createDateString(1983, 5, 26, 1, 0)) + "]", Arrays.asList(4, 3, 2));
            checkSearchResult("uploadDate:[" + toLuceneDate(createDateString(1930, 1, 1, 1, 1)) + " TO " + toLuceneDate(createDateString(2030, 1, 1, 1, 1)) + "]", Arrays.asList(6, 5, 1, 4, 3, 2));
            checkSearchResult("id:1 uploadDate:[" + toLuceneDate(createDateString(2000, 1, 1, 8, 0)) + " TO " + toLuceneDate(createDateString(2000, 1, 1, 12, 0)) + "]", Arrays.asList(1));
        } catch (Exception exception) {
            exception.printStackTrace();
        }
    }

    private static void addBook(Book book) throws Exception {
        Instant uploadDateInstant = book.uploadDate;
        Long uploadDate = toLuceneDate(uploadDateInstant);

        Document document = new Document();
        document.add(new StringField("id", Integer.toString(book.id), Field.Store.YES));
        document.add(new StringField("fileName", book.fileName, Field.Store.YES));
        document.add(new LongPoint("uploadDate", uploadDate));
        document.add(new StoredField("uploadDate", uploadDate.toString()));
        document.add(new NumericDocValuesField("uploadDate", uploadDate));
        indexWriter.addDocument(document);
        indexWriter.commit();
    }

    private static List<Book> searchBooks(String searchQuery) throws Exception {
        // Create the reader and search for the range 101 to 203
        IndexReader indexReader = DirectoryReader.open(indexWriter);
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        StandardQueryParser parser = new StandardQueryParser();
        parser.setAnalyzer(indexWriter.getAnalyzer());

        PointsConfig pointsConfig = new PointsConfig(new DecimalFormat(), Long.class);
        Map<String, PointsConfig> pointsConfigMap = new HashMap<>();
        pointsConfigMap.put("uploadDate", pointsConfig);
        parser.setPointsConfigMap(pointsConfigMap);

        SortField valueSort = new SortedNumericSortField("uploadDate", SortField.Type.LONG, true);
        Sort sorter = new Sort(valueSort);

        Query query = parser.parse(searchQuery, "");
        TopDocs hits = indexSearcher.search(query, 100, sorter);

        List<Book> books = new ArrayList<>();
        for (int i = 0; i < hits.scoreDocs.length; i++) {
            int docId = hits.scoreDocs[i].doc;
            Document document = indexSearcher.doc(docId);
            Integer id = Integer.parseInt(document.get("id"));
            String fileName = document.get("fileName");
            Instant uploadDate = DateTools.stringToDate(document.get("uploadDate")).toInstant();
            books.add(new Book(id, fileName, uploadDate));
        }
        return books;
    }

    private static Instant createDateString(Integer year, Integer month, Integer day, Integer hour, Integer minute) {
        LocalDateTime dateTime = LocalDateTime.of(year, month, day, hour, minute);
        return dateTime.atZone(ZoneId.of("Europe/Zurich")).toInstant();
    }

    private static Long toLuceneDate(Instant instant) {
        return Long.parseLong(DateTools.dateToString(Date.from(instant), DateTools.Resolution.MINUTE));
    }

    public static void checkSearchResult(String queryText, List<Integer> expectedIds) throws Exception {
        System.out.println("========== " + queryText + " ==========");
        List<Book> books = searchBooks(queryText);

        if (expectedIds.isEmpty()) { // Check for no result
            System.out.println("EMPTY AS EXPECTED");
        } else if (expectedIds.size() != books.size()) { // Check dimension
            System.out.println("MISMATCH. GOT: " + books);
        } else { // Check values
            for (int i = 0; i < expectedIds.size(); i++) {
                if (expectedIds.get(i).equals(books.get(i).id)) {
                    System.out.println("FOUND: " + books.get(i));
                } else {
                    System.out.println("MISSING: " + books.get(i).id);

                }
            }
        }
    }
}

输出量:

========== id:1 ==========
FOUND: Book[id=1, fileName=Book1 - The beginning.pdf, uploadDate=2000-01-01T09:00:00Z]
========== id:6 ==========
FOUND: Book[id=6, fileName=TheUltimateDeveloperGuide.pdf, uploadDate=2021-02-16T19:00:00Z]
========== id:1 OR id:2 ==========
FOUND: Book[id=1, fileName=Book1 - The beginning.pdf, uploadDate=2000-01-01T09:00:00Z]
FOUND: Book[id=2, fileName=Start Wars 1.pdf, uploadDate=1977-05-25T09:00:00Z]
========== id:1 AND id:2 ==========
EMPTY AS EXPECTED
========== fileName:TheUltimateDeveloperGuide.pdf ==========
FOUND: Book[id=6, fileName=TheUltimateDeveloperGuide.pdf, uploadDate=2021-02-16T19:00:00Z]
========== id:6 AND fileName:TheUltimateDeveloperGuide.pdf ==========
FOUND: Book[id=6, fileName=TheUltimateDeveloperGuide.pdf, uploadDate=2021-02-16T19:00:00Z]
========== uploadDate:[200001010700 TO 200001011100] ==========
FOUND: Book[id=1, fileName=Book1 - The beginning.pdf, uploadDate=2000-01-01T09:00:00Z]
========== uploadDate:[197701010000 TO 198305252300] ==========
FOUND: Book[id=4, fileName=Start Wars 3.pdf, uploadDate=1983-05-25T08:00:00Z]
FOUND: Book[id=3, fileName=Start Wars 2.pdf, uploadDate=1980-05-21T09:00:00Z]
FOUND: Book[id=2, fileName=Start Wars 1.pdf, uploadDate=1977-05-25T09:00:00Z]
========== uploadDate:[193001010001 TO 203001010001] ==========
FOUND: Book[id=6, fileName=TheUltimateDeveloperGuide.pdf, uploadDate=2021-02-16T19:00:00Z]
FOUND: Book[id=5, fileName=The bible.pdf, uploadDate=2020-01-01T09:00:00Z]
FOUND: Book[id=1, fileName=Book1 - The beginning.pdf, uploadDate=2000-01-01T09:00:00Z]
FOUND: Book[id=4, fileName=Start Wars 3.pdf, uploadDate=1983-05-25T08:00:00Z]
FOUND: Book[id=3, fileName=Start Wars 2.pdf, uploadDate=1980-05-21T09:00:00Z]
FOUND: Book[id=2, fileName=Start Wars 1.pdf, uploadDate=1977-05-25T09:00:00Z]
========== id:1 uploadDate:[200001010700 TO 200001011100] ==========
FOUND: Book[id=1, fileName=Book1 - The beginning.pdf, uploadDate=2000-01-01T09:00:00Z]

相关问题