如何在elasticsearch中聚合文本类型列的总和?

ql3eal8s  于 2021-06-15  发布在  ElasticSearch
关注(0)|答案(1)|浏览(357)

我一直在尝试将聚合函数应用于例如:sum on type of text我的索引Map如下:

{
    "my_elastic_search_index" : {
        "mappings" : {
        "properties" : {
            "@timestamp" : {
            "type" : "date"
            },
            "@version" : {
            "type" : "text",
            "fields" : {
                "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
                }
            }
            },
            "doc" : {
            "properties" : {
                "_id" : {
                "type" : "text",
                "fields" : {
                    "keyword" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                    }
                }
                },
                "last_updated_on" : {
                "type" : "long"
                },
                "sample_ids" : {
                "type" : "nested",
                "properties" : {
                    "name" : {
                    "type" : "text",
                    "fields" : {
                        "keyword" : {
                        "type" : "keyword",
                        "ignore_above" : 256
                        }
                    }
                    },
                    "value" : {
                    "type" : "text",
                    "fields" : {
                        "keyword" : {
                        "type" : "keyword",
                        "ignore_above" : 256
                        }
                    }
                    }
                }
                },
                "status" : {
                "type" : "text",
                "fields" : {
                    "keyword" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                    }
                }
                },
                "filter_id" : {
                "type" : "text",
                "fields" : {
                    "keyword" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                    }
                }
                }
            }
            },
            "query" : {
            "properties" : {
                "match" : {
                "properties" : {
                    "doc" : {
                    "properties" : {
                        "filter_id" : {
                        "type" : "text",
                        "fields" : {
                            "keyword" : {
                            "type" : "keyword",
                            "ignore_above" : 256
                            }
                        }
                        }
                    }
                    }
                }
                }
            }
            }
        }
        }
    }
    }

我的数据是:

{
    "took" : 23,
    "timed_out" : false,
    "_shards" : {
        "total" : 1,
        "successful" : 1,
        "skipped" : 0,
        "failed" : 0
    },
    "hits" : {
        "total" : {
        "value" : 4,
        "relation" : "eq"
        },
        "max_score" : 1.0,
        "hits" : [
        {
            "_index" : "my_elastic_search_index",
            "_type" : "_doc",
            "_id" : "zzz-yyy-xxx-a9e8",
            "_score" : 1.0,
            "_source" : {
            "@version" : "1",
            "@timestamp" : "2019-11-14T14:30:56.261Z",
            "doc" : {
                "status" : "SENT",
                "sample_ids" : [
                {
                    "value" : """"20"""",
                    "name" : "8f4abde123d"
                },
                {
                    "value" : """"25.52"""",
                    "name" : "d92c4732bc9fb91"
                },
                {
                    "value" : """"0"""",
                    "name" : "4b91bdee68b6e"
                },
                {
                    "value" : """"xyz"""",
                    "name" : "bd0a944a292d5a"
                },
                {
                    "value" : """"someothervlue"""",
                    "name" : "8ee9932060d5bf"
                },
                {
                    "value" : """"30..01"""",
                    "name" : "229eed093fa0d85"
                },
                ],
                "filter_id" : "a1357913-cf99650f51d",
                "_id" : "zzz-yyy-xxx-a9e81",
            }
            }
        },
        {
            "_index" : "my_elastic_search_index",
            "_type" : "_doc",
            "_id" : "zzz-yyy-xxx-a9e82",
            "_score" : 1.0,
            "_source" : {
            "@version" : "1",
            "@timestamp" : "2019-11-14T14:30:56.731Z",
            "doc" : {
                "status" : "SENT",
                "sample_ids" : [
                {
                    "value" : """"40"""",
                    "name" : "8f4abde123d"
                },
                {
                    "value" : """"0"""",
                    "name" : "d92c47372bc9fb91"
                },
                {
                    "value" : """"0"""",
                    "name" : "4b91bdc6ee68b6e"
                },
                {
                    "value" : """"0"""",
                    "name" : "bccf07c19cfe12c"
                }
                ],
                "filter_id" : "a1357913-cf99650f51d",
                "_id" : "zzz-yyy-xxx-a9e84",
            },
            }
        },
        {
            "_index" : "my_elastic_search_index",
            "_type" : "_doc",
            "_id" : "zzz-yyy-xxx-a9e85",
            "_score" : 1.0,
            "_source" : {
            "@version" : "1",
            "@timestamp" : "2019-11-14T08:23:36.998Z",
            "doc" : {
                "status" : "SENT",
                "sample_ids" : [
                {
                    "value" : """"17.8"""",
                    "name" : "8f4abde123d"
                },
                {
                    "value" : """"35.6"""",
                    "name" : "d92c473132bc9fb91"
                },
                {
                    "value" : """"0"""",
                    "name" : "4b91bd5c6ee68b6e"
                },
                {
                    "value" : """"35.6"""",
                    "name" : "bd0a944c2a292d5a"
                },
                {
                    "value" : """"0"""",
                    "name" : "8ee9934dce9e2060d5bf"
                },
                {
                    "value" : """"0"""",
                    "name" : "229eed48xsscd3fa0d85"
                },
                {
                    "value" : """"30"""",
                    "name" : "4381f1bddffc4265129"
                },
                {
                    "value" : """"0"""",
                    "name" : "94cafdd1c78fc355b00"
                },
                {
                    "value" : """"HVDC"""",
                    "name" : "bccf024ac19cfe12c"
                }
                ],
                "filter_id" : "a1357913-cf99650f51d",
                "_id" : "zzz-yyy-xxx-a9e84",
                }
            }
        },
        {
            "_index" : "my_elastic_search_index",
            "_type" : "_doc",
            "_id" : "zzz-yyy-xxx-a9e87",
            "_score" : 1.0,
            "_source" : {
            "@version" : "1",
            "@timestamp" : "2019-11-14T08:24:01.272Z",
            "doc" : {
                "sample_ids" : [
                {
                    "value" : """"11.08"""",
                    "name" : "8f4abde123d"
                },
                {
                    "value" : """"35.6"""",
                    "name" : "d92c4737a132bc9fb91"
                },
                {
                    "value" : """"0"""",
                    "name" : "4b91bd5028c6ee68b6e"
                },
                {
                    "value" : """"35.6"""",
                    "name" : "bd0a9445e19c2a292d5a"
                },
                {
                    "value" : """"0"""",
                    "name" : "8ee9934dd002060d5bf"
                },
                {
                    "value" : """"0"""",
                    "name" : "229eed48e2093fa0d85"
                }
                ],
                "filter_id" : "a1357913-cf99650f51d",
                }
            }
        }
        ]
    }
    }

我想返回doc.sample\u ids.value的和的结果为88.88,其中doc.sample\u ids.name=8f4abde123d和filter\u id=a1357913-cf99650f51d,已经尝试将doc.sample\u ids.name转换为number,这给了我错误。有没有我能得到的总和,平均值和计数。

GET /my_elastic_search_index/_search
    {
        "query": {
        "bool": {
        "must": [
            {
            "terms": {
                "doc.filter_id.keyword": [
                "a1357913-cf99650f51d"
                ]
            }
            }
        ]
        }
    },
        "aggs": { 
        "sum_values" : { "sum" : { "script" : { 
        "lang":"painless", "inline" : "Double.parseDouble(doc['sample_ids.value'])" } } } }
        }
nkoocmlb

nkoocmlb1#

使用嵌套字段,因此必须使用嵌套聚合:
尝试这样的聚合:

{
  "aggs": {
    "resellers": {
      "nested": {
        "path": "doc.sample_ids"
      },
      "aggs": {
        "sum_values": {
          "sum": {
            "script": {
              "lang": "painless",
              "source": "Float.parseFloat(doc['doc.sample_ids.value.keyword'].value)"
            }
          }
        }
      }
    }
  }
}

对于这样的数据:“35.6”,首先,您应该清理数据。这不是一个有效的格式,它不应该这样存储。您可以通过重新索引数据或使用loigstash或使用updateby查询来完成。
但是如果你真的不能解决这个问题,你必须在elasticsearch.yml中启用regex功能。
添加此行:

script.painless.regex.enabled: true

然后你就可以这样做:

"aggs": {
    "sum_values": {
      "sum": {
        "script": {
          "lang": "painless",
          "source": """
          String content = /[\"]/.matcher(doc['doc.sample_ids.value.keyword'].value).replaceAll('');
          Double.parseDouble(content);
          """
        }
      }
    }
  }

小心这个查询,性能会很差。你真的应该解决你的摄取问题,并清理你的数据。

相关问题