I am trying to use the LISTAGG
function in Oracle. I would like to get only the distinct values for that column. Is there a way in which I can get only the distinct values without creating a function or a procedure?
col1 col2 Created_by 1 2 Smith 1 2 John 1 3 Ajay 1 4 Ram 1 5 Jack
I need to select col1 and the LISTAGG
of col2 (column 3 is not considered). When I do that, I get something like this as the result of LISTAGG
: [2,2,3,4,5]
I need to remove the duplicate '2' here; I need only the distinct values of col2 against col1.
This question is related to
sql
oracle
aggregate-functions
listagg
Has anyone thought of using a PARTITION BY clause? It worked for me in this query to get a list of application services and the access.
SELECT DISTINCT T.APP_SVC_ID,
LISTAGG(RTRIM(T.ACCESS_MODE), ',') WITHIN GROUP(ORDER BY T.ACCESS_MODE) OVER(PARTITION BY T.APP_SVC_ID) AS ACCESS_MODE
FROM APP_SVC_ACCESS_CNTL T
GROUP BY T.ACCESS_MODE, T.APP_SVC_ID
I had to cut out my where clause for NDA, but you get the idea.
Use listagg_clob function created like this:
create or replace package list_const_p
is
list_sep varchar2(10) := ',';
end list_const_p;
/
sho err
create type listagg_clob_t as object(
v_liststring varchar2(32767),
v_clob clob,
v_templob number,
static function ODCIAggregateInitialize(
sctx IN OUT listagg_clob_t
) return number,
member function ODCIAggregateIterate(
self IN OUT listagg_clob_t, value IN varchar2
) return number,
member function ODCIAggregateTerminate(
self IN OUT listagg_clob_t, returnValue OUT clob, flags IN number
) return number,
member function ODCIAggregateMerge(
self IN OUT listagg_clob_t, ctx2 IN OUT listagg_clob_t
) return number
);
/
sho err
create or replace type body listagg_clob_t is
static function ODCIAggregateInitialize(sctx IN OUT listagg_clob_t)
return number is
begin
sctx := listagg_clob_t('', '', 0);
return ODCIConst.Success;
end;
member function ODCIAggregateIterate(
self IN OUT listagg_clob_t,
value IN varchar2
) return number is
begin
if nvl(lengthb(v_liststring),0) + nvl(lengthb(value),0) <= 4000 then
self.v_liststring:=self.v_liststring || value || list_const_p.list_sep;
else
if self.v_templob = 0 then
dbms_lob.createtemporary(self.v_clob, true, dbms_lob.call);
self.v_templob := 1;
end if;
dbms_lob.writeappend(self.v_clob, length(self.v_liststring), v_liststring);
self.v_liststring := value || list_const_p.list_sep;
end if;
return ODCIConst.Success;
end;
member function ODCIAggregateTerminate(
self IN OUT listagg_clob_t,
returnValue OUT clob,
flags IN number
) return number is
begin
if self.v_templob != 0 then
dbms_lob.writeappend(self.v_clob, length(self.v_liststring), self.v_liststring);
dbms_lob.trim(self.v_clob, dbms_lob.getlength(self.v_clob) - 1);
else
self.v_clob := substr(self.v_liststring, 1, length(self.v_liststring) - 1);
end if;
returnValue := self.v_clob;
return ODCIConst.Success;
end;
member function ODCIAggregateMerge(self IN OUT listagg_clob_t, ctx2 IN OUT listagg_clob_t) return number is
begin
if ctx2.v_templob != 0 then
if self.v_templob != 0 then
dbms_lob.append(self.v_clob, ctx2.v_clob);
dbms_lob.freetemporary(ctx2.v_clob);
ctx2.v_templob := 0;
else
self.v_clob := ctx2.v_clob;
self.v_templob := 1;
ctx2.v_clob := '';
ctx2.v_templob := 0;
end if;
end if;
if nvl(lengthb(self.v_liststring),0) + nvl(lengthb(ctx2.v_liststring),0) <= 4000 then
self.v_liststring := self.v_liststring || ctx2.v_liststring;
ctx2.v_liststring := '';
else
if self.v_templob = 0 then
dbms_lob.createtemporary(self.v_clob, true, dbms_lob.call);
self.v_templob := 1;
end if;
dbms_lob.writeappend(self.v_clob, length(self.v_liststring), self.v_liststring);
dbms_lob.writeappend(self.v_clob, length(ctx2.v_liststring), ctx2.v_liststring);
self.v_liststring := '';
ctx2.v_liststring := '';
end if;
return ODCIConst.Success;
end;
end;
/
sho err
CREATE or replace FUNCTION listagg_clob (input varchar2) RETURN clob
PARALLEL_ENABLE AGGREGATE USING listagg_clob_t;
/
sho err
Here's how to solve your issue.
select
regexp_replace(
'2,2,2.1,3,3,3,3,4,4'
,'([^,]+)(,\1)*(,|$)', '\1\3')
from dual
returns
2,2.1,3,4
From oracle 19C it is built in see here
From 18C and earlier try within group see here
Otherwise use regular expressions
ANSWER below:
select col1,
regexp_replace(
listagg(
col2 , ',') within group (order by col2) -- sorted
,'([^,]+)(,\1)*(,|$)', '\1\3') )
from tableX
where rn = 1
group by col1;
Note: The above will work in most cases - list should be sorted , you may have to trim all trailing and leading space depending on your data.
If you have a alot of items in a group > 20 or big string sizes you might run into oracle string size limit 'result of string concatenation is too long'.
From oracle 12cR2 you can suppress this error see here. Alternatively put a max number on the members in each group. This will only work if its ok to list only the first members. If you have very long variable strings this may not work. you will have to experiment.
select col1,
case
when count(col2) < 100 then
regexp_replace(
listagg(col2, ',') within group (order by col2)
,'([^,]+)(,\1)*(,|$)', '\1\3')
else
'Too many entries to list...'
end
from sometable
where rn = 1
group by col1;
Another solution (not so simple) to hopefully avoid oracle string size limit - string size is limited to 4000. Thanks to this post here by user3465996
select col1 ,
dbms_xmlgen.convert( -- HTML decode
dbms_lob.substr( -- limit size to 4000 chars
ltrim( -- remove leading commas
REGEXP_REPLACE(REPLACE(
REPLACE(
XMLAGG(
XMLELEMENT("A",col2 )
ORDER BY col2).getClobVal(),
'<A>',','),
'</A>',''),'([^,]+)(,\1)*(,|$)', '\1\3'),
','), -- remove leading XML commas ltrim
4000,1) -- limit to 4000 string size
, 1) -- HTML.decode
as col2
from sometable
where rn = 1
group by col1;
V1 - some test cases - FYI
regexp_replace('2,2,2.1,3,3,4,4','([^,]+)(,\1)+', '\1')
-> 2.1,3,4 Fail
regexp_replace('2 ,2 ,2.1,3 ,3 ,4 ,4 ','([^,]+)(,\1)+', '\1')
-> 2 ,2.1,3,4 Success - fixed length items
V2 -items contained within items eg. 2,21
regexp_replace('2.1,1','([^,]+)(,\1)+', '\1')
-> 2.1 Fail
regexp_replace('2 ,2 ,2.1,1 ,3 ,4 ,4 ','(^|,)(.+)(,\2)+', '\1\2')
-> 2 ,2.1,1 ,3 ,4 -- success - NEW regex
regexp_replace('a,b,b,b,b,c','(^|,)(.+)(,\2)+', '\1\2')
-> a,b,b,c fail!
v3 - regex thank Igor! works all cases.
select
regexp_replace('2,2,2.1,3,3,4,4','([^,]+)(,\1)*(,|$)', '\1\3') ,
---> 2,2.1,3,4 works
regexp_replace('2.1,1','([^,]+)(,\1)*(,|$)', '\1\3'),
--> 2.1,1 works
regexp_replace('a,b,b,b,b,c','([^,]+)(,\1)*(,|$)', '\1\3')
---> a,b,c works
from dual
I think this could help - CASE the columns value to NULL if it's duplicate - then it's not appended to LISTAGG string:
with test_data as
(
select 1 as col1, 2 as col2, 'Smith' as created_by from dual
union select 1, 2, 'John' from dual
union select 1, 3, 'Ajay' from dual
union select 1, 4, 'Ram' from dual
union select 1, 5, 'Jack' from dual
union select 2, 5, 'Smith' from dual
union select 2, 6, 'John' from dual
union select 2, 6, 'Ajay' from dual
union select 2, 6, 'Ram' from dual
union select 2, 7, 'Jack' from dual
)
SELECT col1 ,
listagg(col2 , ',') within group (order by col2 ASC) AS orig_value,
listagg(CASE WHEN rwn=1 THEN col2 END , ',') within group (order by col2 ASC) AS distinct_value
from
(
select row_number() over (partition by col1,col2 order by 1) as rwn,
a.*
from test_data a
) a
GROUP BY col1
Results in:
COL1 ORIG DISTINCT
1 2,2,3,4,5 2,3,4,5
2 5,6,6,6,7 5,6,7
19c and later:
select listagg(distinct the_column, ',') within group (order by the_column)
from the_table
18c and earlier:
select listagg(the_column, ',') within group (order by the_column)
from (
select distinct the_column
from the_table
) t
If you need more columns, something like this might be what you are looking for:
select col1, listagg(col2, ',') within group (order by col2)
from (
select col1,
col2,
row_number() over (partition by col1, col2 order by col1) as rn
from foo
order by col1,col2
)
where rn = 1
group by col1;
One annoying aspect with LISTAGG
is that if the total length of concatenated string exceeds 4000 characters( limit for VARCHAR2
in SQL ), the below error is thrown, which is difficult to manage in Oracle versions upto 12.1
ORA-01489: result of string concatenation is too long
A new feature added in 12cR2 is the ON OVERFLOW
clause of LISTAGG
.
The query including this clause would look like:
SELECT pid, LISTAGG(Desc, ' ' on overflow truncate) WITHIN GROUP (ORDER BY seq) AS desc
FROM B GROUP BY pid;
The above will restrict the output to 4000 characters but will not throw the ORA-01489
error.
These are some of the additional options of ON OVERFLOW
clause:
ON OVERFLOW TRUNCATE 'Contd..'
: This will display 'Contd..'
at
the end of string (Default is ...
)ON OVERFLOW TRUNCATE ''
: This will display the 4000 characters
without any terminating string.ON OVERFLOW TRUNCATE WITH COUNT
: This will display the total
number of characters at the end after the terminating characters.
Eg:- '...(5512)
'ON OVERFLOW ERROR
: If you expect the LISTAGG
to fail with the
ORA-01489
error ( Which is default anyway ).The simplest way to handle multiple listagg's is to use 1 WITH (subquery factor) per column containing a listagg of that column from a select distinct:
WITH tab AS
(
SELECT 1 as col1, 2 as col2, 3 as col3, 'Smith' as created_by FROM dual
UNION ALL SELECT 1 as col1, 2 as col2, 3 as col3,'John' as created_by FROM dual
UNION ALL SELECT 1 as col1, 3 as col2, 4 as col3,'Ajay' as created_by FROM dual
UNION ALL SELECT 1 as col1, 4 as col2, 4 as col3,'Ram' as created_by FROM dual
UNION ALL SELECT 1 as col1, 5 as col2, 6 as col3,'Jack' as created_by FROM dual
)
, getCol2 AS
(
SELECT DISTINCT col1, listagg(col2,',') within group (order by col2) over (partition by col1) AS col2List
FROM ( SELECT DISTINCT col1,col2 FROM tab)
)
, getCol3 AS
(
SELECT DISTINCT col1, listagg(col3,',') within group (order by col3) over (partition by col1) AS col3List
FROM ( SELECT DISTINCT col1,col3 FROM tab)
)
select col1,col2List,col3List
FROM getCol2
JOIN getCol3
using (col1)
Which gives:
col1 col2List col3List
1 2,3,4,5 3,4,6
I wrote a function to handle this using regular expressions. The in parameters are: 1) the listagg call itself 2) A repeat of the delimiter
create or replace function distinct_listagg
(listagg_in varchar2,
delimiter_in varchar2)
return varchar2
as
hold_result varchar2(4000);
begin
select rtrim( regexp_replace( (listagg_in)
, '([^'||delimiter_in||']*)('||
delimiter_in||'\1)+($|'||delimiter_in||')', '\1\3'), ',')
into hold_result
from dual;
return hold_result;
end;
Now you don't have to repeat the regular expression every time you do this, simply say:
select distinct_listagg(
listagg(myfield,', ') within group (order by 1),
', '
)
from mytable;
listagg() ignores NULL values, so in a first step you could use the lag() function to analyse whether the previous record had the same value, if yes then NULL, else 'new value'.
WITH tab AS
(
SELECT 1 as col1, 2 as col2, 'Smith' as created_by FROM dual
UNION ALL SELECT 1 as col1, 2 as col2, 'John' as created_by FROM dual
UNION ALL SELECT 1 as col1, 3 as col2, 'Ajay' as created_by FROM dual
UNION ALL SELECT 1 as col1, 4 as col2, 'Ram' as created_by FROM dual
UNION ALL SELECT 1 as col1, 5 as col2, 'Jack' as created_by FROM dual
)
SELECT col1
, CASE
WHEN lag(col2) OVER (ORDER BY col2) = col2 THEN
NULL
ELSE
col2
END as col2_with_nulls
, created_by
FROM tab;
Results
COL1 COL2_WITH_NULLS CREAT
---------- --------------- -----
1 2 Smith
1 John
1 3 Ajay
1 4 Ram
1 5 Jack
Note that the second 2 is replaced by NULL. Now you can wrap a SELECT with the listagg() around it.
WITH tab AS
(
SELECT 1 as col1, 2 as col2, 'Smith' as created_by FROM dual
UNION ALL SELECT 1 as col1, 2 as col2, 'John' as created_by FROM dual
UNION ALL SELECT 1 as col1, 3 as col2, 'Ajay' as created_by FROM dual
UNION ALL SELECT 1 as col1, 4 as col2, 'Ram' as created_by FROM dual
UNION ALL SELECT 1 as col1, 5 as col2, 'Jack' as created_by FROM dual
)
SELECT listagg(col2_with_nulls, ',') WITHIN GROUP (ORDER BY col2_with_nulls) col2_list
FROM ( SELECT col1
, CASE WHEN lag(col2) OVER (ORDER BY col2) = col2 THEN NULL ELSE col2 END as col2_with_nulls
, created_by
FROM tab );
Result
COL2_LIST
---------
2,3,4,5
You can do this over multiple columns too.
WITH tab AS
(
SELECT 1 as col1, 2 as col2, 'Smith' as created_by FROM dual
UNION ALL SELECT 1 as col1, 2 as col2, 'John' as created_by FROM dual
UNION ALL SELECT 1 as col1, 3 as col2, 'Ajay' as created_by FROM dual
UNION ALL SELECT 1 as col1, 4 as col2, 'Ram' as created_by FROM dual
UNION ALL SELECT 1 as col1, 5 as col2, 'Jack' as created_by FROM dual
)
SELECT listagg(col1_with_nulls, ',') WITHIN GROUP (ORDER BY col1_with_nulls) col1_list
, listagg(col2_with_nulls, ',') WITHIN GROUP (ORDER BY col2_with_nulls) col2_list
, listagg(created_by, ',') WITHIN GROUP (ORDER BY created_by) created_by_list
FROM ( SELECT CASE WHEN lag(col1) OVER (ORDER BY col1) = col1 THEN NULL ELSE col1 END as col1_with_nulls
, CASE WHEN lag(col2) OVER (ORDER BY col2) = col2 THEN NULL ELSE col2 END as col2_with_nulls
, created_by
FROM tab );
Result
COL1_LIST COL2_LIST CREATED_BY_LIST
--------- --------- -------------------------
1 2,3,4,5 Ajay,Jack,John,Ram,Smith
I implemented this stored function :
CREATE TYPE LISTAGG_DISTINCT_PARAMS AS OBJECT (ELEMENTO VARCHAR2(2000), SEPARATORE VARCHAR2(10));
CREATE TYPE T_LISTA_ELEMENTI AS TABLE OF VARCHAR2(2000);
CREATE TYPE T_LISTAGG_DISTINCT AS OBJECT (
LISTA_ELEMENTI T_LISTA_ELEMENTI,
SEPARATORE VARCHAR2(10),
STATIC FUNCTION ODCIAGGREGATEINITIALIZE(SCTX IN OUT T_LISTAGG_DISTINCT)
RETURN NUMBER,
MEMBER FUNCTION ODCIAGGREGATEITERATE (SELF IN OUT T_LISTAGG_DISTINCT,
VALUE IN LISTAGG_DISTINCT_PARAMS )
RETURN NUMBER,
MEMBER FUNCTION ODCIAGGREGATETERMINATE (SELF IN T_LISTAGG_DISTINCT,
RETURN_VALUE OUT VARCHAR2,
FLAGS IN NUMBER )
RETURN NUMBER,
MEMBER FUNCTION ODCIAGGREGATEMERGE (SELF IN OUT T_LISTAGG_DISTINCT,
CTX2 IN T_LISTAGG_DISTINCT )
RETURN NUMBER
);
CREATE OR REPLACE TYPE BODY T_LISTAGG_DISTINCT IS
STATIC FUNCTION ODCIAGGREGATEINITIALIZE(SCTX IN OUT T_LISTAGG_DISTINCT) RETURN NUMBER IS
BEGIN
SCTX := T_LISTAGG_DISTINCT(T_LISTA_ELEMENTI() , ',');
RETURN ODCICONST.SUCCESS;
END;
MEMBER FUNCTION ODCIAGGREGATEITERATE(SELF IN OUT T_LISTAGG_DISTINCT, VALUE IN LISTAGG_DISTINCT_PARAMS) RETURN NUMBER IS
BEGIN
IF VALUE.ELEMENTO IS NOT NULL THEN
SELF.LISTA_ELEMENTI.EXTEND;
SELF.LISTA_ELEMENTI(SELF.LISTA_ELEMENTI.LAST) := TO_CHAR(VALUE.ELEMENTO);
SELF.LISTA_ELEMENTI:= SELF.LISTA_ELEMENTI MULTISET UNION DISTINCT SELF.LISTA_ELEMENTI;
SELF.SEPARATORE := VALUE.SEPARATORE;
END IF;
RETURN ODCICONST.SUCCESS;
END;
MEMBER FUNCTION ODCIAGGREGATETERMINATE(SELF IN T_LISTAGG_DISTINCT, RETURN_VALUE OUT VARCHAR2, FLAGS IN NUMBER) RETURN NUMBER IS
STRINGA_OUTPUT CLOB:='';
LISTA_OUTPUT T_LISTA_ELEMENTI;
TERMINATORE VARCHAR2(3):='...';
LUNGHEZZA_MAX NUMBER:=4000;
BEGIN
IF SELF.LISTA_ELEMENTI.EXISTS(1) THEN -- se esiste almeno un elemento nella lista
-- inizializza una nuova lista di appoggio
LISTA_OUTPUT := T_LISTA_ELEMENTI();
-- riversamento dei soli elementi in DISTINCT
LISTA_OUTPUT := SELF.LISTA_ELEMENTI MULTISET UNION DISTINCT SELF.LISTA_ELEMENTI;
-- ordinamento degli elementi
SELECT CAST(MULTISET(SELECT * FROM TABLE(LISTA_OUTPUT) ORDER BY 1 ) AS T_LISTA_ELEMENTI ) INTO LISTA_OUTPUT FROM DUAL;
-- concatenazione in una stringa
FOR I IN LISTA_OUTPUT.FIRST .. LISTA_OUTPUT.LAST - 1
LOOP
STRINGA_OUTPUT := STRINGA_OUTPUT || LISTA_OUTPUT(I) || SELF.SEPARATORE;
END LOOP;
STRINGA_OUTPUT := STRINGA_OUTPUT || LISTA_OUTPUT(LISTA_OUTPUT.LAST);
-- se la stringa supera la dimensione massima impostata, tronca e termina con un terminatore
IF LENGTH(STRINGA_OUTPUT) > LUNGHEZZA_MAX THEN
RETURN_VALUE := SUBSTR(STRINGA_OUTPUT, 0, LUNGHEZZA_MAX - LENGTH(TERMINATORE)) || TERMINATORE;
ELSE
RETURN_VALUE:=STRINGA_OUTPUT;
END IF;
ELSE -- se non esiste nessun elemento, restituisci NULL
RETURN_VALUE := NULL;
END IF;
RETURN ODCICONST.SUCCESS;
END;
MEMBER FUNCTION ODCIAGGREGATEMERGE(SELF IN OUT T_LISTAGG_DISTINCT, CTX2 IN T_LISTAGG_DISTINCT) RETURN NUMBER IS
BEGIN
RETURN ODCICONST.SUCCESS;
END;
END; -- fine corpo
CREATE
FUNCTION LISTAGG_DISTINCT (INPUT LISTAGG_DISTINCT_PARAMS) RETURN VARCHAR2
PARALLEL_ENABLE AGGREGATE USING T_LISTAGG_DISTINCT;
// Example
SELECT LISTAGG_DISTINCT(LISTAGG_DISTINCT_PARAMS(OWNER, ', ')) AS LISTA_OWNER
FROM SYS.ALL_OBJECTS;
I'm sorry, but in some case (for a very big set), Oracle could return this error:
Object or Collection value was too large. The size of the value
might have exceeded 30k in a SORT context, or the size might be
too big for available memory.
but I think this is a good point of start ;)
Using SELECT DISTINCT ...
as part of a Subquery before calling LISTAGG is probably the best way for simple queries, as noted by @a_horse_with_no_name
However, in more complex queries, it might not be possible, or easy, to accomplish this. I had this come up in a scenario that was using top-n approach using an analytic function.
So I found the COLLECT
aggregate function. It is documented to have the UNIQUE
or DISTINCT
modifier available. Only in 10g, it quietly fails (it ignores the modifier without error). However, to overcome this, from another answer, I came to this solution:
SELECT
...
(
SELECT LISTAGG(v.column_value,',') WITHIN GROUP (ORDER BY v.column_value)
FROM TABLE(columns_tab) v
) AS columns,
...
FROM (
SELECT
...
SET(CAST(COLLECT(UNIQUE some_column ORDER BY some_column) AS tab_typ)) AS columns_tab,
...
)
Basically, by using SET
, I remove the duplicates in my collection.
You would still need to define the tab_typ
as a basic collection type, and in the case of a VARCHAR
, this would be for example:
CREATE OR REPLACE type tab_typ as table of varchar2(100)
/
Also as a correction to the answer from @a_horse_with_no_name on the multi column situation, where you might want to aggregate still on a third (or more) columns:
select
col1,
listagg(CASE rn2 WHEN 1 THEN col2 END, ',') within group (order by col2) AS col2_list,
listagg(CASE rn3 WHEN 1 THEN col3 END, ',') within group (order by col3) AS col3_list,
SUM(col4) AS col4
from (
select
col1,
col2,
row_number() over (partition by col1, col2 order by null) as rn2,
row_number() over (partition by col1, col3 order by null) as rn3
from foo
)
group by col1;
If you would leave the rn = 1
as a where condition to the query, you would aggregate other columns incorrectly.
select col1, listaggr(col2,',') within group(Order by col2) from table group by col1
meaning aggregate the strings (col2) into list keeping the order n then afterwards deal with the duplicates as group by col1 meaning merge col1 duplicates in 1 group. perhaps this looks clean and simple as it should be
and if in case you want col3 as well just you need to add one more listagg() that is select col1, listaggr(col2,',') within group(Order by col2),listaggr(col3,',') within group(order by col3) from table group by col1
What about creating a dedicated function that will make the "distinct" part :
create or replace function listagg_distinct (t in str_t, sep IN VARCHAR2 DEFAULT ',')
return VARCHAR2
as
l_rc VARCHAR2(4096) := '';
begin
SELECT listagg(val, sep) WITHIN GROUP (ORDER BY 1)
INTO l_rc
FROM (SELECT DISTINCT column_value val FROM table(t));
RETURN l_rc;
end;
/
And then use it to do the aggregation :
SELECT col1, listagg_distinct(cast(collect(col_2) as str_t ), ', ')
FROM your_table
GROUP BY col_1;
Further refining @YoYo's correction to @a_horse_with_no_name's row_number() based approach using DECODE vs CASE (i saw here). I see that @Martin Vrbovsky also has this case approach answer.
select
col1,
listagg(col2, ',') within group (order by col2) AS col2_list,
listagg(col3, ',') within group (order by col3) AS col3_list,
SUM(col4) AS col4
from (
select
col1,
decode(row_number() over (partition by col1, col2 order by null),1,col2) as col2,
decode(row_number() over (partition by col1, col3 order by null),1,col3) as col3
from foo
)
group by col1;
I neded a DISTINCT version of this and got this one working out.
RTRIM(REGEXP_REPLACE(
(value, ', ') WITHIN GROUP( ORDER BY value)),
'([^ ]+)(, \1)+','\1'),', ')
Upcoming Oracle 19c will support DISTINCT
with LISTAGG
.
This feature is coming with 19c:
SQL> select deptno, listagg (distinct sal,', ') within group (order by sal) 2 from scott.emp 3 group by deptno;
EDIT:
The LISTAGG aggregate function now supports duplicate elimination by using the new DISTINCT keyword. The LISTAGG aggregate function orders the rows for each group in a query according to the ORDER BY expression and then concatenates the values into a single string. With the new DISTINCT keyword, duplicate values can be removed from the specified expression before concatenation into a single string. This removes the need to create complex query processing to find the distinct values prior to using the aggregate LISTAGG function. With the DISTINCT option, the processing to remove duplicate values can be done directly within the LISTAGG function. The result is simpler, faster, more efficient SQL.
Very simple - use in your query a sub-query with a select distinct:
SELECT question_id,
LISTAGG(element_id, ',') WITHIN GROUP (ORDER BY element_id)
FROM
(SELECT distinct question_id, element_id
FROM YOUR_TABLE)
GROUP BY question_id;
If you want distinct values across MULTIPLE columns, want control over sort order, don't want to use an undocumented function that may disappear, and do not want more than one full table scan, you may find this construct useful:
with test_data as
(
select 'A' as col1, 'T_a1' as col2, '123' as col3 from dual
union select 'A', 'T_a1', '456' from dual
union select 'A', 'T_a1', '789' from dual
union select 'A', 'T_a2', '123' from dual
union select 'A', 'T_a2', '456' from dual
union select 'A', 'T_a2', '111' from dual
union select 'A', 'T_a3', '999' from dual
union select 'B', 'T_a1', '123' from dual
union select 'B', 'T_b1', '740' from dual
union select 'B', 'T_b1', '846' from dual
)
select col1
, (select listagg(column_value, ',') within group (order by column_value desc) from table(collect_col2)) as col2s
, (select listagg(column_value, ',') within group (order by column_value desc) from table(collect_col3)) as col3s
from
(
select col1
, collect(distinct col2) as collect_col2
, collect(distinct col3) as collect_col3
from test_data
group by col1
);
you can use undocumented wm_concat
function.
select col1, wm_concat(distinct col2) col2_list
from tab1
group by col1;
this function returns clob column, if you want you can use dbms_lob.substr
to convert clob to varchar2.
If the intent is to apply this transformation to multiple columns, I have extended a_horse_with_no_name's solution:
SELECT * FROM
(SELECT LISTAGG(GRADE_LEVEL, ',') within group(order by GRADE_LEVEL) "Grade Levels" FROM (select distinct GRADE_LEVEL FROM Students) t) t1,
(SELECT LISTAGG(ENROLL_STATUS, ',') within group(order by ENROLL_STATUS) "Enrollment Status" FROM (select distinct ENROLL_STATUS FROM Students) t) t2,
(SELECT LISTAGG(GENDER, ',') within group(order by GENDER) "Legal Gender Code" FROM (select distinct GENDER FROM Students) t) t3,
(SELECT LISTAGG(CITY, ',') within group(order by CITY) "City" FROM (select distinct CITY FROM Students) t) t4,
(SELECT LISTAGG(ENTRYCODE, ',') within group(order by ENTRYCODE) "Entry Code" FROM (select distinct ENTRYCODE FROM Students) t) t5,
(SELECT LISTAGG(EXITCODE, ',') within group(order by EXITCODE) "Exit Code" FROM (select distinct EXITCODE FROM Students) t) t6,
(SELECT LISTAGG(LUNCHSTATUS, ',') within group(order by LUNCHSTATUS) "Lunch Status" FROM (select distinct LUNCHSTATUS FROM Students) t) t7,
(SELECT LISTAGG(ETHNICITY, ',') within group(order by ETHNICITY) "Race Code" FROM (select distinct ETHNICITY FROM Students) t) t8,
(SELECT LISTAGG(CLASSOF, ',') within group(order by CLASSOF) "Expected Graduation Year" FROM (select distinct CLASSOF FROM Students) t) t9,
(SELECT LISTAGG(TRACK, ',') within group(order by TRACK) "Track Code" FROM (select distinct TRACK FROM Students) t) t10,
(SELECT LISTAGG(GRADREQSETID, ',') within group(order by GRADREQSETID) "Graduation ID" FROM (select distinct GRADREQSETID FROM Students) t) t11,
(SELECT LISTAGG(ENROLLMENT_SCHOOLID, ',') within group(order by ENROLLMENT_SCHOOLID) "School Key" FROM (select distinct ENROLLMENT_SCHOOLID FROM Students) t) t12,
(SELECT LISTAGG(FEDETHNICITY, ',') within group(order by FEDETHNICITY) "Federal Race Code" FROM (select distinct FEDETHNICITY FROM Students) t) t13,
(SELECT LISTAGG(SUMMERSCHOOLID, ',') within group(order by SUMMERSCHOOLID) "Summer School Key" FROM (select distinct SUMMERSCHOOLID FROM Students) t) t14,
(SELECT LISTAGG(FEDRACEDECLINE, ',') within group(order by FEDRACEDECLINE) "Student Decl to Prov Race Code" FROM (select distinct FEDRACEDECLINE FROM Students) t) t15
This is Oracle Database 11g Enterprise Edition Release 11.2.0.2.0 - 64bit Production.
I was unable to use STRAGG because there is no way to DISTINCT and ORDER.
Performance scales linearly, which is good, since I am adding all columns of interest. The above took 3 seconds for 77K rows. For just one rollup, .172 seconds. I do with there was a way to distinctify multiple columns in a table in one pass.
To get around the string length issue you can use XMLAGG
which is similar to listagg
but it returns a clob.
You can can then parse using regexp_replace
and get the unique values and then turn it back into a string using dbms_lob.substr()
. If you have a huge amount of distinct values you will still run out of space this way but for a lot of cases the code below should work.
You can also change the delimiters you use. In my case I wanted '-' instead of ',' but you should be able to replace the dashes in my code and use commas if you want that.
select col1,
dbms_lob.substr(ltrim(REGEXP_REPLACE(REPLACE(
REPLACE(
XMLAGG(
XMLELEMENT("A",col2)
ORDER BY col2).getClobVal(),
'<A>','-'),
'</A>',''),'([^-]*)(-\1)+($|-)',
'\1\3'),'-'), 4000,1) as platform_mix
from table
If you do not need a particular order of concatenated values, and the separator can be a comma, you can do:
select col1, stragg(distinct col2)
from table
group by col1
You can do it via RegEx replacement. Here is an example:
-- Citations Per Year - Cited Publications main query. Includes list of unique associated core project numbers, ordered by core project number.
SELECT ptc.pmid AS pmid, ptc.pmc_id, ptc.pub_title AS pubtitle, ptc.author_list AS authorlist,
ptc.pub_date AS pubdate,
REGEXP_REPLACE( LISTAGG ( ppcc.admin_phs_org_code ||
TO_CHAR(ppcc.serial_num,'FM000000'), ',') WITHIN GROUP (ORDER BY ppcc.admin_phs_org_code ||
TO_CHAR(ppcc.serial_num,'FM000000')),
'(^|,)(.+)(,\2)+', '\1\2')
AS projectNum
FROM publication_total_citations ptc
JOIN proj_paper_citation_counts ppcc
ON ptc.pmid = ppcc.pmid
AND ppcc.citation_year = 2013
JOIN user_appls ua
ON ppcc.admin_phs_org_code = ua.admin_phs_org_code
AND ppcc.serial_num = ua.serial_num
AND ua.login_id = 'EVANSF'
GROUP BY ptc.pmid, ptc.pmc_id, ptc.pub_title, ptc.author_list, ptc.pub_date
ORDER BY pmid;
Also posted here: Oracle - unique Listagg values
I overcame this issue by grouping on the values first, then do another aggregation with the listagg. Something like this:
select a,b,listagg(c,',') within group(order by c) c, avg(d)
from (select a,b,c,avg(d)
from table
group by (a,b,c))
group by (a,b)
only one full table access, relatively easy to expand to more complex queries
Source: Stackoverflow.com