I have this table in SQL that contains info on multiple hospital visits for each patient:
CREATE TABLE patient_visits (
patient_id INT,
hospital_visit_date DATE,
visit_num INT,
appointment_result VARCHAR(3),
PRIMARY KEY (patient_id, visit_num)
);
INSERT INTO patient_visits (patient_id, hospital_visit_date, visit_num, appointment_result) VALUES
(1001, '2023-01-15', 1, 'yes'),
(1001, '2023-02-20', 2, 'no'),
(1001, '2023-04-10', 3, 'yes'),
(1001, '2023-07-05', 4, 'yes'),
(1001, '2023-11-22', 5, 'no');
INSERT INTO patient_visits (patient_id, hospital_visit_date, visit_num, appointment_result) VALUES
(1002, '2023-03-08', 1, 'yes'),
(1002, '2023-06-12', 2, 'yes'),
(1002, '2023-10-30', 3, 'no');
INSERT INTO patient_visits (patient_id, hospital_visit_date, visit_num, appointment_result) VALUES
(1003, '2023-02-05', 1, 'yes'),
(1003, '2023-08-18', 2, 'no');
INSERT INTO patient_visits (patient_id, hospital_visit_date, visit_num, appointment_result) VALUES
(1004, '2023-01-10', 1, 'no'),
(1004, '2023-03-25', 2, 'yes'),
(1004, '2023-05-14', 3, 'yes'),
(1004, '2023-09-07', 4, 'no');
INSERT INTO patient_visits (patient_id, hospital_visit_date, visit_num, appointment_result) VALUES
(1005, '2023-07-20', 1, 'yes');
INSERT INTO patient_visits (patient_id, hospital_visit_date, visit_num, appointment_result) VALUES
(1006, '2023-01-05', 1, 'yes'),
(1006, '2023-02-15', 2, 'yes'),
(1006, '2023-05-22', 3, 'no'),
(1006, '2023-08-30', 4, 'yes'),
(1006, '2023-12-15', 5, 'yes');
When displayed, the table looks like this:
patient_id hospital_visit_date visit_num appointment_result
1001 2023-01-15 1 yes
1001 2023-02-20 2 no
1001 2023-04-10 3 yes
1001 2023-07-05 4 yes
1001 2023-11-22 5 no
1002 2023-03-08 1 yes
1002 2023-06-12 2 yes
1002 2023-10-30 3 no
1003 2023-02-05 1 yes
1003 2023-08-18 2 no
1004 2023-01-10 1 no
1004 2023-03-25 2 yes
1004 2023-05-14 3 yes
1004 2023-09-07 4 no
1005 2023-07-20 1 yes
1006 2023-01-05 1 yes
1006 2023-02-15 2 yes
1006 2023-05-22 3 no
1006 2023-08-30 4 yes
1006 2023-12-15 5 yes
Based on this table, I want to answer the following questions:
I want to answer the following question:
- For patients who have exactly 1 appointment: what was the miss rate on their first appointment?
- For patients who have exactly 2 appointments: what was the miss rate on their first appointment? what was the miss rate on their second appointment?
- For patients who have exactly 3 appointments: what was the miss rate on their first appointment? what was the miss rate on their second appointment? what was the miss rate on their third appointment?
- For patients who have more than 3 appointments: what was the miss rate on their first appointment? what was the miss rate on their second appointment? what was the miss rate on their third appointment? what was their miss rate on all other appointments (lumped into one)?
Currently, I am doing this manually, e.g.
WITH patient_appointment_counts AS (
SELECT patient_id, COUNT(*) as total_appointments
FROM patient_visits
GROUP BY patient_id
)
SELECT
pv.visit_num,
COUNT(CASE WHEN pv.appointment_result = 'no' THEN 1 END) * 100.0 / COUNT(*) as appointment_miss_rate
FROM patient_visits pv
JOIN patient_appointment_counts pac ON pv.patient_id = pac.patient_id
WHERE pac.total_appointments = 3
GROUP BY pv.visit_num
ORDER BY pv.visit_num;
Is there a way I can do this for everything at once?
A proof of concept idea:
"WITH patient_appointment_counts AS (
-- Get the total number of appointments for each patient
SELECT
patient_id,
COUNT(*) as total_appointments
FROM patient_visits
GROUP BY patient_id
),
patient_appointment_categories AS (
-- Categorize patients based on their total appointments
-- 1 = exactly 1 appointment
-- 2 = exactly 2 appointments
-- 3 = exactly 3 appointments
-- 999 = more than 3 appointments
SELECT
patient_id,
CASE
WHEN total_appointments = 1 THEN 1
WHEN total_appointments = 2 THEN 2
WHEN total_appointments = 3 THEN 3
WHEN total_appointments > 3 THEN 999
END AS appointment_category
FROM patient_appointment_counts
),
normalized_visits AS (
-- Normalize visit numbers, grouping visits beyond #3 for patients with >3 appointments
SELECT
pv.patient_id,
pv.visit_num,
pv.appointment_result,
pac.appointment_category,
CASE
WHEN pac.appointment_category = 999 AND pv.visit_num > 3 THEN 4 -- Group all appointments beyond 3
ELSE pv.visit_num
END AS normalized_visit_num
FROM patient_visits pv
JOIN patient_appointment_categories pac ON pv.patient_id = pac.patient_id
),
miss_counts AS (
-- Calculate miss counts and totals for each category and visit number
SELECT
appointment_category,
normalized_visit_num,
COUNT(*) AS total_count,
SUM(CASE WHEN appointment_result = 'no' THEN 1 ELSE 0 END) AS miss_count
FROM normalized_visits
GROUP BY appointment_category, normalized_visit_num
)
SELECT
CASE
WHEN appointment_category = 1 THEN 'Patients with exactly 1 appointment'
WHEN appointment_category = 2 THEN 'Patients with exactly 2 appointments'
WHEN appointment_category = 3 THEN 'Patients with exactly 3 appointments'
WHEN appointment_category = 999 THEN 'Patients with more than 3 appointments'
END AS patient_group,
CASE
WHEN normalized_visit_num = 1 THEN '1st appointment'
WHEN normalized_visit_num = 2 THEN '2nd appointment'
WHEN normalized_visit_num = 3 THEN '3rd appointment'
WHEN normalized_visit_num = 4 THEN '4th+ appointments (combined)'
END AS appointment_number,
total_count,
miss_count,
ROUND((miss_count * 100.0 / total_count), 2) AS miss_rate_percentage
FROM miss_counts
ORDER BY
CASE
WHEN appointment_category = 1 THEN 1
WHEN appointment_category = 2 THEN 2
WHEN appointment_category = 3 THEN 3
WHEN appointment_category = 999 THEN 4
END,
normalized_visit_num;
I have this table in SQL that contains info on multiple hospital visits for each patient:
CREATE TABLE patient_visits (
patient_id INT,
hospital_visit_date DATE,
visit_num INT,
appointment_result VARCHAR(3),
PRIMARY KEY (patient_id, visit_num)
);
INSERT INTO patient_visits (patient_id, hospital_visit_date, visit_num, appointment_result) VALUES
(1001, '2023-01-15', 1, 'yes'),
(1001, '2023-02-20', 2, 'no'),
(1001, '2023-04-10', 3, 'yes'),
(1001, '2023-07-05', 4, 'yes'),
(1001, '2023-11-22', 5, 'no');
INSERT INTO patient_visits (patient_id, hospital_visit_date, visit_num, appointment_result) VALUES
(1002, '2023-03-08', 1, 'yes'),
(1002, '2023-06-12', 2, 'yes'),
(1002, '2023-10-30', 3, 'no');
INSERT INTO patient_visits (patient_id, hospital_visit_date, visit_num, appointment_result) VALUES
(1003, '2023-02-05', 1, 'yes'),
(1003, '2023-08-18', 2, 'no');
INSERT INTO patient_visits (patient_id, hospital_visit_date, visit_num, appointment_result) VALUES
(1004, '2023-01-10', 1, 'no'),
(1004, '2023-03-25', 2, 'yes'),
(1004, '2023-05-14', 3, 'yes'),
(1004, '2023-09-07', 4, 'no');
INSERT INTO patient_visits (patient_id, hospital_visit_date, visit_num, appointment_result) VALUES
(1005, '2023-07-20', 1, 'yes');
INSERT INTO patient_visits (patient_id, hospital_visit_date, visit_num, appointment_result) VALUES
(1006, '2023-01-05', 1, 'yes'),
(1006, '2023-02-15', 2, 'yes'),
(1006, '2023-05-22', 3, 'no'),
(1006, '2023-08-30', 4, 'yes'),
(1006, '2023-12-15', 5, 'yes');
When displayed, the table looks like this:
patient_id hospital_visit_date visit_num appointment_result
1001 2023-01-15 1 yes
1001 2023-02-20 2 no
1001 2023-04-10 3 yes
1001 2023-07-05 4 yes
1001 2023-11-22 5 no
1002 2023-03-08 1 yes
1002 2023-06-12 2 yes
1002 2023-10-30 3 no
1003 2023-02-05 1 yes
1003 2023-08-18 2 no
1004 2023-01-10 1 no
1004 2023-03-25 2 yes
1004 2023-05-14 3 yes
1004 2023-09-07 4 no
1005 2023-07-20 1 yes
1006 2023-01-05 1 yes
1006 2023-02-15 2 yes
1006 2023-05-22 3 no
1006 2023-08-30 4 yes
1006 2023-12-15 5 yes
Based on this table, I want to answer the following questions:
I want to answer the following question:
- For patients who have exactly 1 appointment: what was the miss rate on their first appointment?
- For patients who have exactly 2 appointments: what was the miss rate on their first appointment? what was the miss rate on their second appointment?
- For patients who have exactly 3 appointments: what was the miss rate on their first appointment? what was the miss rate on their second appointment? what was the miss rate on their third appointment?
- For patients who have more than 3 appointments: what was the miss rate on their first appointment? what was the miss rate on their second appointment? what was the miss rate on their third appointment? what was their miss rate on all other appointments (lumped into one)?
Currently, I am doing this manually, e.g.
WITH patient_appointment_counts AS (
SELECT patient_id, COUNT(*) as total_appointments
FROM patient_visits
GROUP BY patient_id
)
SELECT
pv.visit_num,
COUNT(CASE WHEN pv.appointment_result = 'no' THEN 1 END) * 100.0 / COUNT(*) as appointment_miss_rate
FROM patient_visits pv
JOIN patient_appointment_counts pac ON pv.patient_id = pac.patient_id
WHERE pac.total_appointments = 3
GROUP BY pv.visit_num
ORDER BY pv.visit_num;
Is there a way I can do this for everything at once?
A proof of concept idea:
"WITH patient_appointment_counts AS (
-- Get the total number of appointments for each patient
SELECT
patient_id,
COUNT(*) as total_appointments
FROM patient_visits
GROUP BY patient_id
),
patient_appointment_categories AS (
-- Categorize patients based on their total appointments
-- 1 = exactly 1 appointment
-- 2 = exactly 2 appointments
-- 3 = exactly 3 appointments
-- 999 = more than 3 appointments
SELECT
patient_id,
CASE
WHEN total_appointments = 1 THEN 1
WHEN total_appointments = 2 THEN 2
WHEN total_appointments = 3 THEN 3
WHEN total_appointments > 3 THEN 999
END AS appointment_category
FROM patient_appointment_counts
),
normalized_visits AS (
-- Normalize visit numbers, grouping visits beyond #3 for patients with >3 appointments
SELECT
pv.patient_id,
pv.visit_num,
pv.appointment_result,
pac.appointment_category,
CASE
WHEN pac.appointment_category = 999 AND pv.visit_num > 3 THEN 4 -- Group all appointments beyond 3
ELSE pv.visit_num
END AS normalized_visit_num
FROM patient_visits pv
JOIN patient_appointment_categories pac ON pv.patient_id = pac.patient_id
),
miss_counts AS (
-- Calculate miss counts and totals for each category and visit number
SELECT
appointment_category,
normalized_visit_num,
COUNT(*) AS total_count,
SUM(CASE WHEN appointment_result = 'no' THEN 1 ELSE 0 END) AS miss_count
FROM normalized_visits
GROUP BY appointment_category, normalized_visit_num
)
SELECT
CASE
WHEN appointment_category = 1 THEN 'Patients with exactly 1 appointment'
WHEN appointment_category = 2 THEN 'Patients with exactly 2 appointments'
WHEN appointment_category = 3 THEN 'Patients with exactly 3 appointments'
WHEN appointment_category = 999 THEN 'Patients with more than 3 appointments'
END AS patient_group,
CASE
WHEN normalized_visit_num = 1 THEN '1st appointment'
WHEN normalized_visit_num = 2 THEN '2nd appointment'
WHEN normalized_visit_num = 3 THEN '3rd appointment'
WHEN normalized_visit_num = 4 THEN '4th+ appointments (combined)'
END AS appointment_number,
total_count,
miss_count,
ROUND((miss_count * 100.0 / total_count), 2) AS miss_rate_percentage
FROM miss_counts
ORDER BY
CASE
WHEN appointment_category = 1 THEN 1
WHEN appointment_category = 2 THEN 2
WHEN appointment_category = 3 THEN 3
WHEN appointment_category = 999 THEN 4
END,
normalized_visit_num;
Share
Improve this question
edited 2 hours ago
user430997
asked 2 hours ago
user430997user430997
5211 silver badge11 bronze badges
4
- I don't understand: for any given appointment, the miss rate can only be 0% or 100%. Can you give expected results and explain them clearly? Do you mean the miss rate across all patients, but broken down by how many appointments they had? – Charlieface Commented 2 hours ago
- Is this the sort of result you are looking for? dbfiddle.uk/D5cYPGjq – Charlieface Commented 1 hour ago
- hi Charlieface, thank you for your answer! check out the sample query I added at the end of the question ... – user430997 Commented 1 hour ago
- Please provide the desired results are you expecting for the sample data. – Dale K Commented 1 hour ago
1 Answer
Reset to default 1You don't need to re-join for this. It can be done in a single scan of the base table, in a number of steps.
- Number the rows by date, per
patient_id
. - Group up by
patient_id
taking:- The count of appointments, capping at 4
- The number of misses for the 1st, 2nd, 3rd and other rows (
rn = 1
etc). We null out any of these where the count wasn't high enough. - The total number of other rows
- Group again, by that count of appointments. This gives us one row per
patient_group
, and would be the normal way to return this type of resultset.- Take the count of
patient_id
- The total number of misses for the 1st, 2nd, 3rd and other rows, for all
patient_id
s in the group. - The average of misses, by multiplying the above by 100 and dividing by the number of
patient_id
s. In the case of "other", we divide by the total.
- Take the count of
- Finally, to wrangle it into your desired format, unpivot it. There are various ways, the best is usually to laterally join the values.
WITH numbered AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY patient_id ORDER BY hospital_visit_date) AS rn
FROM patient_visits pv
),
by_patient AS (
SELECT
LEAST(COUNT(*), 4) AS total_appts_up_to_4,
COUNT(CASE WHEN appointment_result = 'no' AND rn = 1 THEN 1 END) AS no1,
CASE WHEN COUNT(*) >= 2 THEN COUNT(CASE WHEN appointment_result = 'no' AND rn = 2 THEN 1 END) END AS no2,
CASE WHEN COUNT(*) >= 3 THEN COUNT(CASE WHEN appointment_result = 'no' AND rn = 3 THEN 1 END) END AS no3,
CASE WHEN COUNT(*) >= 4 THEN COUNT(CASE WHEN appointment_result = 'no' AND rn > 3 THEN 1 END) END AS no_other,
CASE WHEN COUNT(*) >= 4 THEN COUNT(CASE WHEN rn > 3 THEN 1 END) END AS count_other
FROM numbered
GROUP BY
patient_id
),
by_total AS (
SELECT
total_appts_up_to_4,
COUNT(*) AS total_count,
SUM(no1) AS miss_1,
SUM(no2) AS miss_2,
SUM(no3) AS miss_3,
SUM(no_other) AS miss_other,
SUM(no1) * 100.0 / COUNT(*) AS miss_rate_1,
SUM(no2) * 100.0 / COUNT(*) AS miss_rate_2,
SUM(no3) * 100.0 / COUNT(*) AS miss_rate_3,
SUM(no_other) * 100.0 / NULLIF(SUM(count_other), 0) AS miss_rate_other
FROM by_patient
GROUP BY
total_appts_up_to_4
)
SELECT
CASE total_appts_up_to_4
WHEN 1 THEN 'Patients with exactly 1 appointment'
WHEN 2 THEN 'Patients with exactly 2 appointments'
WHEN 3 THEN 'Patients with exactly 3 appointments'
ELSE 'Patients with more than 3 appointments'
END AS patient_group,
v.appointment_number,
t.total_count,
v.miss_count,
v.miss_rate_percentage
FROM by_total t
LATERAL CROSS JOIN (VALUES
('1st appointment', t.miss_1, t.miss_rate_1),
('2nd appointment', t.miss_2, t.miss_rate_2),
('3rd appointment', t.miss_3, t.miss_rate_3),
('4th+ appointments (combined)', t.miss_other, t.miss_rate_other)
) AS v(appointment_number, miss_count, miss_rate_percentage)
WHERE v.miss_count IS NOT NULL;
db<>fiddle