[elasticsearch] ElasticSearch - Return Unique Values

If you want to get all unique values without any approximation or setting a magic number (size: 500), then use COMPOSITE AGGREGATION (ES 6.5+).

From official documentation:

"If you want to retrieve all terms or all combinations of terms in a nested terms aggregation you should use the COMPOSITE AGGREGATION which allows to paginate over all possible terms rather than setting a size greater than the cardinality of the field in the terms aggregation. The terms aggregation is meant to return the top terms and does not allow pagination."

Implementation example in JavaScript:

_x000D_
_x000D_
const ITEMS_PER_PAGE = 1000;_x000D_
_x000D_
const body =  {_x000D_
    "size": 0, // Returning only aggregation results: https://www.elastic.co/guide/en/elasticsearch/reference/current/returning-only-agg-results.html_x000D_
    "aggs" : {_x000D_
        "langs": {_x000D_
            "composite" : {_x000D_
                "size": ITEMS_PER_PAGE,_x000D_
                "sources" : [_x000D_
                    { "language": { "terms" : { "field": "language" } } }_x000D_
                ]_x000D_
            }_x000D_
        }_x000D_
     }_x000D_
};_x000D_
_x000D_
const uniqueLanguages = [];_x000D_
_x000D_
while (true) {_x000D_
  const result = await es.search(body);_x000D_
_x000D_
  const currentUniqueLangs = result.aggregations.langs.buckets.map(bucket => bucket.key);_x000D_
_x000D_
  uniqueLanguages.push(...currentUniqueLangs);_x000D_
_x000D_
  const after = result.aggregations.langs.after_key;_x000D_
_x000D_
  if (after) {_x000D_
      // continue paginating unique items_x000D_
      body.aggs.langs.composite.after = after;_x000D_
  } else {_x000D_
      break;_x000D_
  }_x000D_
}_x000D_
_x000D_
console.log(uniqueLanguages);
_x000D_
_x000D_
_x000D_