我有下面的查询,它匹配2个使用公共字段的集合,但有一个问题,比较不是不敏感的,因此我没有得到完全匹配。然后我试图使它不敏感。
下面是我使用的查询:
cursor= db.csv_import.aggregate([
{
$lookup: {
from: 'EN',
let: {pn:'$part_no',vendor:'$vendor_standard'},
pipeline: [{
$match: {
$expr: {
$and: [{$eq: ["$$pn","$ICECAT-interface.Product.@Prod_id"]}],{$eq: ["$$vendor","$ICECAT-interface.Product.Supplier.@Name"]}]
}
}
}],
as: 'part_number_info'
}
}, { $match: {"part_number_info.0": {$exists: true}}
}, { $project: {"part_no": 1,"part_number_info.ICECAT-interface.Product.@ID": 1, "part_number_info.ICECAT-interface.Product.Supplier.@Name": 1, "_id":0}}
]).pretty();
我在这里读到过关于麻木不仁的文章:https://www.mongodb.com/docs/manual/core/index-case-insensitive/
因此,我在2个集合上为供应商和产品部件号创建了2个索引,如下所示(部件号示例)
db.csv_import.createIndex(
{'part_no': 1},{name: "part_no_unsensitive_idx", collation: {locale: "en",strength:2})
db.EN.createIndex(
{'ICECAT-interface.Product.@Prod_id': 1},{name: "product_unsensitive_idx", collation: {locale: "en",strength:2})
我试了这个
cursor= db.csv_import.aggregate([
{
$lookup: {
from: 'EN',
let: {pn:'$part_no',vendor:'$vendor_standard'},
pipeline: [{
$match: {
$expr: {
$and: [{$eq: ["$$pn","$ICECAT-interface.Product.@Prod_id"]},{$eq: ["$$vendor","$ICECAT-interface.Product.Supplier.@Name"]}]
}
}
}],
as: 'part_number_info'
}
}, { $match: {"part_number_info.0": {$exists: true}}
}, { $project: {"part_no": 1,"part_number_info.ICECAT-interface.Product.@ID": 1, "part_number_info.ICECAT-interface.Product.Supplier.@Name": 1, "_id":0}}
]).collation( { locale: 'en', strength: 2 }).pretty();
然而,执行时间从毫秒变成了很长的分钟(超过10分钟)。显然它没有使用任何索引,是我创建索引的方式有问题还是我在这里遗漏了其他东西?
编辑
我按照建议将其更改为(在管道之后进行排序),虽然它有所改进(花费了一半的时间),但仍然需要一个小时,而不使用排序只需要几秒钟:
use Icecat
db.csv_import.aggregate([
{
$lookup: {
from: 'EN',
let: {pn:'$part_no',vendor:'$vendor_standard'},
pipeline: [{
$match: {
$expr: {
$and: [{$eq: ["$$pn","$ICECAT-interface.Product.@Prod_id"]},{$eq: ["$$vendor","$ICECAT-interface.Product.Supplier.@Name"]}]
}
}
}],
as: 'part_number_info'
}
}, { $match: {"part_number_info.0": {$exists: true}}
}, { $project: {"part_no": 1,"part_number_info.ICECAT-interface.Product.@ID": 1, "part_number_info.ICECAT-interface.Product.Supplier.@Name": 1, "_id":0}
}
],
{collation: { locale: 'en', strength: 2 }
).pretty();
编辑2
要解释地输出:
{
"explainVersion": "1",
"stages": [
{
"$cursor": {
"queryPlanner": {
"namespace": "Icecat.csv_import",
"indexFilterSet": false,
"parsedQuery": {},
"queryHash": "005DB16E",
"planCacheKey": "E1018696",
"maxIndexedOrSolutionsReached": false,
"maxIndexedAndSolutionsReached": false,
"maxScansToExplodeReached": false,
"winningPlan": {
"stage": "PROJECTION_SIMPLE",
"transformBy": {
"part_no": 1,
"part_number_info": 1,
"vendor_standard": 1,
"_id": 0
},
"inputStage": {
"stage": "COLLSCAN",
"direction": "forward"
}
},
"rejectedPlans": []
},
"executionStats": {
"executionSuccess": true,
"nReturned": 23685,
"executionTimeMillis": 9051,
"totalKeysExamined": 0,
"totalDocsExamined": 23685,
"executionStages": {
"stage": "PROJECTION_SIMPLE",
"nReturned": 23685,
"executionTimeMillisEstimate": 18,
"works": 23687,
"advanced": 23685,
"needTime": 1,
"needYield": 0,
"saveState": 25,
"restoreState": 25,
"isEOF": 1,
"transformBy": {
"part_no": 1,
"part_number_info": 1,
"vendor_standard": 1,
"_id": 0
},
"inputStage": {
"stage": "COLLSCAN",
"nReturned": 23685,
"executionTimeMillisEstimate": 8,
"works": 23687,
"advanced": 23685,
"needTime": 1,
"needYield": 0,
"saveState": 25,
"restoreState": 25,
"isEOF": 1,
"direction": "forward",
"docsExamined": 23685
}
}
}
},
"nReturned": 23685,
"executionTimeMillisEstimate": 65
},
{
"$lookup": {
"from": "EN",
"as": "part_number_info",
"let": {
"pn": "$part_no",
"vendor": "$vendor_standard"
},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{
"$eq": [
"$$pn",
"$ICECAT-interface.Product.@Prod_id"
]
},
{
"$eq": [
"$$vendor",
"$ICECAT-interface.Product.Supplier.@Name"
]
}
]
}
}
}
]
},
"totalDocsExamined": 2769,
"totalKeysExamined": 2769,
"collectionScans": 0,
"indexesUsed": [
"ICECAT-interface.Product.@Prod_id_1"
],
"nReturned": 23685,
"executionTimeMillisEstimate": 8918
},
{
"$match": {
"part_number_info.0": {
"$exists": true
}
},
"nReturned": 2690,
"executionTimeMillisEstimate": 8919
},
{
"$project": {
"part_no": true,
"part_number_info": {
"ICECAT-interface": {
"Product": {
"@ID": true,
"Supplier": {
"@Name": true
}
}
}
},
"_id": false
},
"nReturned": 2690,
"executionTimeMillisEstimate": 8919
}
],
"serverInfo": {
"host": "ip-10-0-1-199.eu-west-1.compute.internal",
"port": 27017,
"version": "5.0.9-8",
"gitVersion": "15a95b4ea8203b337be88bebbeea864b4dadb6a2"
},
"serverParameters": {
"internalQueryFacetBufferSizeBytes": 104857600,
"internalQueryFacetMaxOutputDocSizeBytes": 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
"internalDocumentSourceGroupMaxMemoryBytes": 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
"internalQueryProhibitBlockingMergeOnMongoS": 0,
"internalQueryMaxAddToSetBytes": 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600
},
"command": {
"aggregate": "csv_import",
"pipeline": [
{
"$lookup": {
"from": "EN",
"let": {
"pn": "$part_no",
"vendor": "$vendor_standard"
},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{
"$eq": [
"$$pn",
"$ICECAT-interface.Product.@Prod_id"
]
},
{
"$eq": [
"$$vendor",
"$ICECAT-interface.Product.Supplier.@Name"
]
}
]
}
}
}
],
"as": "part_number_info"
}
},
{
"$match": {
"part_number_info.0": {
"$exists": true
}
}
},
{
"$project": {
"part_no": 1,
"part_number_info.ICECAT-interface.Product.@ID": 1,
"part_number_info.ICECAT-interface.Product.Supplier.@Name": 1,
"_id": 0
}
}
],
"cursor": {},
"$db": "Icecat"
},
"ok": 1
}
1条答案
按热度按时间yqyhoc1h1#
我觉得这里可能有两个问题。
第一个问题是不清楚您是否以适当的方式请求排序规则。虽然这可能取决于版本,但文档建议的语法为:
第二件事是,有点类似于你之前的一个问题,我认为这里的字段名称是错误的。在你的
$lookup
pipeline
中,EN
集合有:但您在
EN
集合上创建的索引是:请注意,字段名称的结尾是
@Prod_id
与@ID
。其中一个需要更新。根据解释计划(谢谢!)和其他问题进行编辑。
解释计划似乎显示大多数情况都按预期工作。更具体地说,我们可以看到:
1.查询在源
csv_import
集合上使用集合扫描。请注意,这是 * 预期的 ,因为聚合 * 没有 * 对该集合进行任何筛选,因此索引没有任何帮助。1.数据库 is 使用
EN
集合上的索引。stages
数组中的$lookup
项报告"indexesUsed": [ "ICECAT-interface.Product.@Prod_id_1" ]
沿着"totalDocsExamined": 2769
和"totalKeysExamined": 2769
。最后,为了更好的度量,它还报告"collectionScans": 0
。1.执行此操作所需的总时间约为9秒:
"executionTimeMillis": 9051
.那么,为什么我说这是“大部分工作”,这里报告的9秒和您的评论“ 过去需要几秒的东西现在需要大约一个小时(当添加排序规则时)*"之间的区别是什么?
在
explain
输出中没有看到collation
。当我生成一个解释计划(使用前面提到的语法,将collation
指定为.aggregate()
函数调用中的一个选项)时,响应的command
部分如下所示:特别是
collation
是直接表述的,这在你的explain
输出中是没有的,所以我认为:1.我们正在查看的
explain
计划是针对 * 不 * 使用collation
的聚合。这也意味着您的"ICECAT-interface.Product.@Prod_id_1"
索引***没有***应用排序规则。您应该能够通过检查该集合的.getIndexes()
输出来确认这一点。1.当您应用
collation
并尝试执行该操作时,数据库将无法使用索引(由于上一点中提到的不匹配),这无疑可以解释为什么会出现长持续时间。总而言之-
EN
集合上的索引所定义的排序规则与您正在运行的聚合之间似乎仍存在不匹配。请仔细检查并更正这些项。