Home > database >  find average based on unique field and then sum it up - ElasticSearch
find average based on unique field and then sum it up - ElasticSearch

Time:01-25

I have written the query to find average of one field based on another field.

My query to find average looks like this:

{
    "aggs": {
        "unique_Vac_id": {
            "terms": {
                "field": "Vac_id"
            },
            "aggs": {
                "f_in_review": {
                    "avg": {
                        "field": "f_in_review"
                    }
                },
                "f_shortlisted": {
                    "avg": {
                        "field": "f_shortlisted"
                    }
                }
            }
        }
    }
}

Some example data here:

{"_source": {"Vac_id": 185,"f_in_review": 1,"f_shortlisted": 1}},
{"_source": {"Vac_id": 100,"f_in_review": 1,"f_shortlisted": 0}},
{"_source": {"Vac_id": 185,"f_in_review": 0,"f_shortlisted": 1}}...

Above query give me result like this:

"aggregations": {
    "unique_Vac_id": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 7356,
        "buckets": [
            {
                "key": 185,
                "doc_count": 44,
                "f_shortlisted": {"value": 0.16666666666666666},
                "f_in_review": {"value": 0.2777777777777778},
            },
            {
                "key": 100,
                "doc_count": 39,
                "f_shortlisted": {"value": 0.125},
                "f_in_review": {"value": 0.1875},
            },
        ]
    }
}

Now I want to sum f_shortlisted and f_in_review. How can I achieve that? Thankyou

CodePudding user response:

The sum_bucket pipeline aggregation is the one to use

{
    "aggs": {
        "unique_Vac_id": {
            "terms": {
                "field": "Vac_id"
            },
            "aggs": {
                "f_in_review": {
                    "avg": {
                        "field": "f_in_review"
                    }
                },
                "f_shortlisted": {
                    "avg": {
                        "field": "f_shortlisted"
                    }
                }
            }
        },
        "sum_in_review": {
            "sum_bucket": {
                "buckets_path": "unique_Vac_id>f_in_review" 
            }
        },
        "sum_shortlisted": {
            "sum_bucket": {
                "buckets_path": "unique_Vac_id>f_shortlisted" 
            }
        }
    }
}

CodePudding user response:

If you want to sum f_shortlisted and f_in_review for each bucket, you can use pipeline aggregation and bucket script. This is an example query:

{
  "size": 0,
  "aggs": {
    "unique_Vac_id": {
      "terms": {
        "field": "Vac_id"
      },
      "aggs": {
        "f_in_review": {
          "avg": {
            "field": "f_in_review"
          }
        },
        "f_shortlisted": {
          "avg": {
            "field": "f_shortlisted"
          }
        },
        "sum": {
          "bucket_script": {
            "buckets_path": {
              "f_in_review": "f_in_review",
              "f_shortlisted": "f_shortlisted"
            },
            "script": "params.f_in_review   params.f_shortlisted"
          }
        }
      }
    }
  }
}
  •  Tags:  
  • Related