I have the following table in my database:
id | amount_cents | date | category
-------+--------------+---------------------+---------------
45714 | -799 | 2024-10-01 00:00:00 | indifferent
27638 | -6500 | 2024-10-01 00:00:00 | bad
27636 | -6504 | 2024-10-01 00:00:00 | good
40488 | -532 | 2024-10-01 00:00:00 | bad
40788 | -89300 | 2024-10-02 00:00:00 | good
40789 | -93830 | 2024-10-02 00:00:00 | indifferent
50087 | -5748 | 2024-10-02 00:00:00 | bad
49360 | -775 | 2024-10-02 00:00:00 | good
22337 | -16000 | 2023-10-02 00:00:00 | good
I'm trying to return a data set similar to the following, where the total amounts and percentages for each category
are calculated for each day:
date_trunc | good_count | good_amount_cents | good_percentage_count | good_percentage_amount | bad_count | bad_amount_cents | bad_percentage_count | bad_percentage_amount | indifferent_count | indifferent_amount_cents | indifferent_percentage_count | indifferent_percentage_amount
---------------------+------------+-------------------+-----------------------+------------------------+-----------+------------------+----------------------+-----------------------+-------------------+--------------------------+------------------------------+-------------------------------
2023-10-01 00:00:00 | 1 | -6504 | 0.250 | 0.453 | 2 | -7032 | 0.500 | 0.490 | 1 | -799 | 0.500 | 0.055
2023-10-02 00:00:00 | 3 | -106075 | 0.600 | 0.515 | 1 | -5748 | 0.200 | 0.027 | 1 | -93830 | 0.200 | 0.456
So far I've got as far as getting the total count of transactions grouped by date
:
SELECT DATE_TRUNC('day', date), count(id) FROM transactions WHERE category IS NOT NULL GROUP BY DATE_TRUNC('day', date);
date_trunc | count
---------------------+-------
2024-10-01 00:00:00 | 8
2024-10-02 00:00:00 | 19
How do I extend that to then (a) group by the various category
, (b) sum the amounts_cent
for each category and then calculate the percentage_count
and percentage_amount
for each category
in each date
?
I have the following table in my database:
id | amount_cents | date | category
-------+--------------+---------------------+---------------
45714 | -799 | 2024-10-01 00:00:00 | indifferent
27638 | -6500 | 2024-10-01 00:00:00 | bad
27636 | -6504 | 2024-10-01 00:00:00 | good
40488 | -532 | 2024-10-01 00:00:00 | bad
40788 | -89300 | 2024-10-02 00:00:00 | good
40789 | -93830 | 2024-10-02 00:00:00 | indifferent
50087 | -5748 | 2024-10-02 00:00:00 | bad
49360 | -775 | 2024-10-02 00:00:00 | good
22337 | -16000 | 2023-10-02 00:00:00 | good
I'm trying to return a data set similar to the following, where the total amounts and percentages for each category
are calculated for each day:
date_trunc | good_count | good_amount_cents | good_percentage_count | good_percentage_amount | bad_count | bad_amount_cents | bad_percentage_count | bad_percentage_amount | indifferent_count | indifferent_amount_cents | indifferent_percentage_count | indifferent_percentage_amount
---------------------+------------+-------------------+-----------------------+------------------------+-----------+------------------+----------------------+-----------------------+-------------------+--------------------------+------------------------------+-------------------------------
2023-10-01 00:00:00 | 1 | -6504 | 0.250 | 0.453 | 2 | -7032 | 0.500 | 0.490 | 1 | -799 | 0.500 | 0.055
2023-10-02 00:00:00 | 3 | -106075 | 0.600 | 0.515 | 1 | -5748 | 0.200 | 0.027 | 1 | -93830 | 0.200 | 0.456
So far I've got as far as getting the total count of transactions grouped by date
:
SELECT DATE_TRUNC('day', date), count(id) FROM transactions WHERE category IS NOT NULL GROUP BY DATE_TRUNC('day', date);
date_trunc | count
---------------------+-------
2024-10-01 00:00:00 | 8
2024-10-02 00:00:00 | 19
How do I extend that to then (a) group by the various category
, (b) sum the amounts_cent
for each category and then calculate the percentage_count
and percentage_amount
for each category
in each date
?
3 Answers
Reset to default 5We can use conditional aggregation here:
SELECT
"date"::date,
COUNT(*) FILTER (WHERE category = 'good') AS good_count,
SUM(amount_cents) FILTER (WHERE category = 'good') AS good_amount_cents,
1.0 * COUNT(*) FILTER (WHERE category = 'good') / COUNT(*) AS good_percentage_count,
1.0 * SUM(amount_cents) FILTER (WHERE category = 'good') / SUM(amount_cents) AS good_percentage_amount,
COUNT(*) FILTER (WHERE category = 'bad') AS bad_count,
SUM(amount_cents) FILTER (WHERE category = 'bad') AS bad_amount_cents,
1.0 * COUNT(*) FILTER (WHERE category = 'bad') / COUNT(*) AS bad_percentage_count,
1.0 * SUM(amount_cents) FILTER (WHERE category = 'bad') / SUM(amount_cents) AS bad_percentage_amount,
COUNT(*) FILTER (WHERE category = 'indifferent') AS indifferent_count,
SUM(amount_cents) FILTER (WHERE category = 'indifferent') AS indifferent_amount_cents,
1.0 * COUNT(*) FILTER (WHERE category = 'indifferent') / COUNT(*) AS indifferent_percentage_count,
1.0 * SUM(amount_cents) FILTER (WHERE category = 'indifferent') / SUM(amount_cents) AS indifferent_percentage_amount
FROM transactions
GROUP BY "date"::date
ORDER BY "date"::date;
As you are applying identical transformations to each one of the 3 categories,
you could have generic steps on categories,
before the final query that dispatches each set of category-specific values to a set of columns:
with
bycat as (select date_trunc('day', date) "day", category, count(*) n, sum(amount_cents) amount from t group by 1, 2),
counts as
(
select
*,
1.0 * n / sum(n) over (partition by day) percentage,
1.0 * amount / sum(amount) over (partition by day) amount_percentage
from bycat
)
select
day
,sum(case when category = 'good' then n else 0 end) good_count
,sum(case when category = 'good' then amount else 0 end) good_amount_cents
,sum(case when category = 'good' then percentage else 0 end) good_percentage_count
,sum(case when category = 'good' then amount_percentage else 0 end) good_percentage_amount
,sum(case when category = 'bad' then n else 0 end) bad_count
,sum(case when category = 'bad' then amount else 0 end) bad_amount_cents
,sum(case when category = 'bad' then percentage else 0 end) bad_percentage_count
,sum(case when category = 'bad' then amount_percentage else 0 end) bad_percentage_amount
,sum(case when category = 'indifferent' then n else 0 end) indifferent_count
,sum(case when category = 'indifferent' then amount else 0 end) indifferent_amount_cents
,sum(case when category = 'indifferent' then percentage else 0 end) indifferent_percentage_count
,sum(case when category = 'indifferent' then amount_percentage else 0 end) indifferent_percentage_amount
from counts
group by 1 order by 1;
See the complete SQLFiddle
To avoid copy-paste errors (… as I first did in my original answer),
particularly if the categories number tends to increase,
you can use a hand-made crosstab that will autogenerate the column names.
As Postgresql crosstab does not allow dynamic column names,
this will be done by generating a function that generates the results:
create or replace function generate() returns text language plpgsql as
$generate$
declare
selsql text;
retsql text;
begin
select string_agg(_retsql, ', '), string_agg(_selsql, E'\n')
into retsql, selsql
from
(
select
category||'_count int, '||category||'_amount int, '||category||'_percentage float, '||category||'_amount_percentage float' _retsql,
$$
,sum(case when category = '$$||category||$$' then n else 0 end)
,sum(case when category = '$$||category||$$' then amount else 0 end)
,sum(case when category = '$$||category||$$' then percentage else 0 end)
,sum(case when category = '$$||category||$$' then amount_percentage else 0 end)
$$ _selsql
from t group by category order by case category when 'good' then 0 when 'bad' then 1 else 2 end
) t;
execute format
(
$generated$
create or replace function crosstab_good_bad()
returns table(day timestamp, %s)
language sql
as
$$
with
bycat as (select date_trunc('day', date) "day", category, count(*) n, sum(amount_cents) amount from t group by 1, 2),
counts as
(
select
*,
1.0 * n / sum(n) over (partition by day) percentage,
1.0 * amount / sum(amount) over (partition by day) amount_percentage
from bycat
)
select
day
%s
from counts
group by 1 order by 1;
$$;
$generated$,
retsql,
selsql
);
return 'crosstab_good_bad';
end;
$generate$;
select * from generate();
select * from crosstab_good_bad();
See it in action.