Home 리텐션 - 다른 테이블의 속성으로 코호트 분석
Post
Cancel

리텐션 - 다른 테이블의 속성으로 코호트 분석

legislators 테이블

1
2
SELECT * 
FROM legislators


성별

앞서 진행한 JOIN 조건들과 동일하되, legislators 테이블을 추가로 JOIN 한다. 날짜와 state로 살펴본 것과 달리 성별에 따른 코호트 숫자를 알아본다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
SELECT
	D.gender,
	COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0) AS period,
	COUNT(DISTINCT A.id_bioguide) AS cohort_retained
FROM
(
	SELECT id_bioguide, MIN(term_start) AS first_term
	FROM legislators_terms
	GROUP BY id_bioguide
) AS A JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
	LEFT JOIN date_dim AS C ON C.date BETWEEN B.term_start AND B.term_end
		AND C.month_name = 'December' AND C.day_of_month = 31
	JOIN legislators AS D ON A.id_bioguide = D.id_bioguide
GROUP BY 1, 2
ORDER BY 1, 2;


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
SELECT
	gender,
	period,
	cohort_retained,
	FIRST_VALUE(cohort_retained) OVER(PARTITION BY gender ORDER BY period ASC) AS cohort_size,
	ROUND(cohort_retained * 1.0 / FIRST_VALUE(cohort_retained) OVER(PARTITION BY gender ORDER BY period ASC), 2) AS pct_retained
FROM
(
	SELECT
		D.gender,
		COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0) AS period,
		COUNT(DISTINCT A.id_bioguide) AS cohort_retained
	FROM
	(
		SELECT id_bioguide, MIN(term_start) AS first_term
		FROM legislators_terms
		GROUP BY id_bioguide
	) AS A JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
		LEFT JOIN date_dim AS C ON C.date BETWEEN B.term_start AND B.term_end
			AND C.month_name = 'December' AND C.day_of_month = 31
		JOIN legislators AS D ON A.id_bioguide = D.id_bioguide
	GROUP BY 1, 2
) AS sub
ORDER BY 2, 1;


A 테이블의 first_term이 1917-01-01부터 1999-12-31까지인 경우만 코호트 숫자와 비율을 계산한다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
SELECT
	gender,
	period,
	cohort_retained,
	FIRST_VALUE(cohort_retained) OVER(PARTITION BY gender ORDER BY period ASC) AS cohort_size,
	ROUND(cohort_retained * 1.0 / FIRST_VALUE(cohort_retained) OVER(PARTITION BY gender ORDER BY period ASC), 2) AS pct_retained
FROM
(
	SELECT
		D.gender,
		COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0) AS period,
		COUNT(DISTINCT A.id_bioguide) AS cohort_retained
	FROM
	(
		SELECT id_bioguide, MIN(term_start) AS first_term
		FROM legislators_terms
		GROUP BY id_bioguide
	) AS A JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
		LEFT JOIN date_dim AS C ON C.date BETWEEN B.term_start AND B.term_end
			AND C.month_name = 'December' AND C.day_of_month = 31
		JOIN legislators AS D ON A.id_bioguide = D.id_bioguide
	WHERE A.first_term BETWEEN '1917-01-01' AND '1999-12-31'
	GROUP BY 1, 2
) AS sub
ORDER BY 2, 1;

This post is licensed under CC BY 4.0 by the author.