ID별 첫 번째 임기 시작 날짜와 state
1
2
3
4
5
SELECT
DISTINCT(id_bioguide),
MIN(term_start) OVER(PARTITION BY id_bioguide) AS first_term,
FIRST_VALUE(state) OVER(PARTITION BY id_bioguide ORDER BY term_start) AS first_state
FROM legislators_terms;
state, gender, period별 코호트 숫자
- 위 쿼리를 서브쿼리로 사용한다.
- 이전 게시글에서와 마찬가지로 여러 테이블을 JOIN 한다. (상세한 JOIN 조건들에 대해서는 이전 글들을 참고)
- A 테이블에서 계산한 first_term이 1917-01-01부터 1999-12-31까지에 해당하는 테이블만을 조회한다.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
SELECT A.first_state, D.gender, COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0) AS period, COUNT(DISTINCT(A.id_bioguide)) AS cohort_retained FROM ( SELECT DISTINCT(id_bioguide), MIN(term_start) OVER(PARTITION BY id_bioguide) AS first_term, FIRST_VALUE(state) OVER(PARTITION BY id_bioguide ORDER BY term_start) AS first_state FROM legislators_terms ) AS A JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide LEFT JOIN date_dim AS C ON C.date BETWEEN B.term_start AND B.term_end AND C.month_name = 'December' AND C.day_of_month = 31 JOIN legislators AS D ON A.id_bioguide = D.id_bioguide WHERE A.first_term BETWEEN '1917-01-01' AND '1999-12-31' GROUP BY 1, 2, 3;
state, gender, period별 코호트 숫자와 비율
- 위 테이블을 서브쿼리로 사용한다.
- state, gender별로 period를 오름차순 정렬했을 때 처음 나타나는 코호트 숫자와 비율을 계산한다.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
SELECT first_state, gender, period, cohort_retained, FIRST_VALUE(cohort_retained) OVER(PARTITION BY first_state, gender ORDER BY period) AS cohort_size, ROUND(cohort_retained * 1.0 / FIRST_VALUE(cohort_retained) OVER(PARTITION BY first_state, gender ORDER BY period), 2) AS pct_retained FROM ( SELECT A.first_state, D.gender, COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0) AS period, COUNT(DISTINCT(A.id_bioguide)) AS cohort_retained FROM ( SELECT DISTINCT(id_bioguide), MIN(term_start) OVER(PARTITION BY id_bioguide) AS first_term, FIRST_VALUE(state) OVER(PARTITION BY id_bioguide ORDER BY term_start) AS first_state FROM legislators_terms ) AS A JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide LEFT JOIN date_dim AS C ON C.date BETWEEN B.term_start AND B.term_end AND C.month_name = 'December' AND C.day_of_month = 31 JOIN legislators AS D ON A.id_bioguide = D.id_bioguide WHERE A.first_term BETWEEN '1917-01-01' AND '1999-12-31' GROUP BY 1, 2, 3 ) AS sub;
gender, state별 코호트 숫자
1
2
3
4
5
6
7
8
9
10
11
12
13
14
SELECT
B.gender,
A.first_state,
COUNT(DISTINCT(A.id_bioguide)) AS cohort_size
FROM
(
SELECT
DISTINCT(id_bioguide),
MIN(term_start) OVER(PARTITION BY id_bioguide) AS first_term,
FIRST_VALUE(state) OVER(PARTITION BY id_bioguide ORDER BY term_start) AS first_state
FROM legislators_terms
) AS A JOIN legislators AS B ON A.id_bioguide = B.id_bioguide
WHERE A.first_term BETWEEN '1917-01-01' AND '1999-12-31'
GROUP BY 1, 2;
이전 테이블과 period JOIN
- 해당 테이블을 T1이라고 가정하자.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
SELECT B.gender, B.first_state, C.period, B.cohort_size FROM ( SELECT B.gender, A.first_state, COUNT(DISTINCT(A.id_bioguide)) AS cohort_size FROM ( SELECT DISTINCT(id_bioguide), MIN(term_start) OVER(PARTITION BY id_bioguide) AS first_term, FIRST_VALUE(state) OVER(PARTITION BY id_bioguide ORDER BY term_start ASC) AS first_state FROM legislators_terms ) AS A JOIN legislators AS B ON A.id_bioguide = B.id_bioguide WHERE A.first_term BETWEEN '1917-01-01' AND '1999-12-31' GROUP BY 1, 2 ) AS B JOIN (SELECT generate_series AS period FROM generate_series(0, 20, 1)) AS C ON 1 = 1;
state, gender, period별 코호트 숫자
- 해당 테이블은 이전 게시글에서 다뤘던 내용과 같은 쿼리이다.
- 해당 테이블을 T2라고 가정하자.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
SELECT D.first_state, G.gender, COALESCE(DATE_PART('year', AGE(F.date, D.first_term)), 0) AS period, COUNT(DISTINCT(D.id_bioguide)) AS cohort_retained FROM ( SELECT DISTINCT(id_bioguide), MIN(term_start) OVER(PARTITION BY id_bioguide) AS first_term, FIRST_VALUE(state) OVER(PARTITION BY id_bioguide ORDER BY term_start ASC) AS first_state FROM legislators_terms ) AS D JOIN legislators_terms AS E ON D.id_bioguide = E.id_bioguide LEFT JOIN date_dim AS F ON F.date BETWEEN E.term_start AND E.term_end AND F.month_name = 'December' AND F.day_of_month = 31 JOIN legislators AS G ON D.id_bioguide = G.id_bioguide WHERE D.first_term BETWEEN '1917-01-01' AND '1999-12-31' GROUP BY 1, 2, 3;
위 두 테이블 T1과 T2 JOIN
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
SELECT
aa.gender,
aa.first_state,
aa.period,
aa.cohort_size,
COALESCE(bb.cohort_retained, 0) AS cohort_retained,
ROUND(COALESCE(bb.cohort_retained, 0) * 1.0 / aa.cohort_size, 2) AS pct_retained
FROM
(
SELECT B.gender, B.first_state, C.period, B.cohort_size
FROM
(
SELECT B.gender, A.first_state, COUNT(DISTINCT(A.id_bioguide)) AS cohort_size
FROM
(
SELECT
DISTINCT(id_bioguide),
MIN(term_start) OVER(PARTITION BY id_bioguide) AS first_term,
FIRST_VALUE(state) OVER(PARTITION BY id_bioguide ORDER BY term_start ASC) AS first_state
FROM legislators_terms
) AS A JOIN legislators AS B ON A.id_bioguide = B.id_bioguide
WHERE A.first_term BETWEEN '1917-01-01' AND '1999-12-31'
GROUP BY 1, 2
) AS B JOIN (SELECT generate_series AS period
FROM generate_series(0, 20, 1)) AS C ON 1 = 1
) AS aa
LEFT JOIN
(
SELECT
D.first_state,
G.gender,
COALESCE(DATE_PART('year', AGE(F.date, D.first_term)), 0) AS period,
COUNT(DISTINCT(D.id_bioguide)) AS cohort_retained
FROM
(
SELECT
DISTINCT(id_bioguide),
MIN(term_start) OVER(PARTITION BY id_bioguide) AS first_term,
FIRST_VALUE(state) OVER(PARTITION BY id_bioguide ORDER BY term_start ASC) AS first_state
FROM legislators_terms
) AS D JOIN legislators_terms AS E ON D.id_bioguide = E.id_bioguide
LEFT JOIN date_dim AS F ON F.date BETWEEN E.term_start AND E.term_end
AND F.month_name = 'December' AND F.day_of_month = 31
JOIN legislators AS G ON D.id_bioguide = G.id_bioguide
WHERE D.first_term BETWEEN '1917-01-01' AND '1999-12-31'
GROUP BY 1, 2, 3
) AS bb ON aa.gender = bb.gender AND aa.first_state = bb.first_state AND aa.period = bb.period
ORDER BY 1, 2, 3;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
SELECT
gender, first_state, cohort_size,
MAX(CASE WHEN period=0 THEN pct_retained END) AS yr0,
MAX(CASE WHEN period=2 THEN pct_retained END) AS yr2,
MAX(CASE WHEN period=4 THEN pct_retained END) AS yr4,
MAX(CASE WHEN period=6 THEN pct_retained END) AS yr6,
MAX(CASE WHEN period=8 THEN pct_retained END) AS yr8,
MAX(CASE WHEN period=10 THEN pct_retained END) AS yr10
FROM
(
SELECT
aa.gender,
aa.first_state,
aa.period,
aa.cohort_size,
COALESCE(bb.cohort_retained, 0) AS cohort_retained,
ROUND(COALESCE(bb.cohort_retained, 0) * 1.0 / aa.cohort_size, 2) AS pct_retained
FROM
(
SELECT B.gender, B.first_state, C.period, B.cohort_size
FROM
(
SELECT B.gender, A.first_state, COUNT(DISTINCT(A.id_bioguide)) AS cohort_size
FROM
(
SELECT
DISTINCT(id_bioguide),
MIN(term_start) OVER(PARTITION BY id_bioguide) AS first_term,
FIRST_VALUE(state) OVER(PARTITION BY id_bioguide ORDER BY term_start ASC) AS first_state
FROM legislators_terms
) AS A JOIN legislators AS B ON A.id_bioguide = B.id_bioguide
WHERE A.first_term BETWEEN '1917-01-01' AND '1999-12-31'
GROUP BY 1, 2
) AS B JOIN (SELECT generate_series AS period
FROM generate_series(0, 20, 1)) AS C ON 1 = 1
) AS aa
LEFT JOIN
(
SELECT
D.first_state,
G.gender,
COALESCE(DATE_PART('year', AGE(F.date, D.first_term)), 0) AS period,
COUNT(DISTINCT(D.id_bioguide)) AS cohort_retained
FROM
(
SELECT
DISTINCT(id_bioguide),
MIN(term_start) OVER(PARTITION BY id_bioguide) AS first_term,
FIRST_VALUE(state) OVER(PARTITION BY id_bioguide ORDER BY term_start ASC) AS first_state
FROM legislators_terms
) AS D JOIN legislators_terms AS E ON D.id_bioguide = E.id_bioguide
LEFT JOIN date_dim AS F ON F.date BETWEEN E.term_start AND E.term_end
AND F.month_name = 'December' AND F.day_of_month = 31
JOIN legislators AS G ON D.id_bioguide = G.id_bioguide
WHERE D.first_term BETWEEN '1917-01-01' AND '1999-12-31'
GROUP BY 1, 2, 3
) AS bb ON aa.gender = bb.gender AND aa.first_state = bb.first_state AND aa.period = bb.period
) AS result
GROUP BY 1, 2, 3;