Home 관련 코호트 분석 - 리턴십(반복 구매 행동)
Post
Cancel

관련 코호트 분석 - 리턴십(반복 구매 행동)

term type이 rep인 id별 first term 날짜

1
2
3
4
SELECT id_bioguide, MIN(term_start) AS first_term
FROM legislators_terms
WHERE term_type = 'rep'
GROUP BY 1;


century별 몇 명인지?

  • 위 쿼리를 서브쿼리로 사용한다.
  • century별 id를 COUNT 한다.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    
    SELECT
      DATE_PART('century', first_term) AS cohort_century,
      COUNT(id_bioguide) AS reps
    FROM (
      SELECT id_bioguide, MIN(term_start) AS first_term
      FROM legislators_terms
      WHERE term_type = 'rep'
      GROUP BY 1
    ) AS A  -- rep인 경우의 first_term
    GROUP BY 1
    ORDER BY 1;
    


임기 타입이 rep와 sen인 경우 century별 몇 명인지?

1
2
3
4
5
6
7
8
9
10
11
12
SELECT
	DATE_PART('century', A.first_term) AS cohort_century,
	COUNT(DISTINCT A.id_bioguide) AS rep_and_sen
FROM (
	SELECT id_bioguide, MIN(term_start) AS first_term
	FROM legislators_terms
	WHERE term_type = 'rep'
	GROUP BY 1
) AS A  -- rep인 경우의 first_term
	JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
		AND B.term_type = 'sen' AND B.term_start > A.first_term
GROUP BY 1;


century별 rep와 rep & sen의 수와 비율

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
SELECT
	A2.cohort_century,
	A2.reps,
	B2.rep_and_sen,
	ROUND(B2.rep_and_sen * 1.0 / A2.reps, 3) AS pct_rep_and_sen
FROM (
	SELECT
		DATE_PART('century', first_term) AS cohort_century,
		COUNT(id_bioguide) AS reps
	FROM (
		SELECT id_bioguide, MIN(term_start) AS first_term
		FROM legislators_terms
		WHERE term_type = 'rep'
		GROUP BY 1
	) AS A  -- rep인 경우의 first_term
	GROUP BY 1
) AS A2
LEFT JOIN (
	SELECT
		DATE_PART('century', A.first_term) AS cohort_century,
		COUNT(DISTINCT A.id_bioguide) AS rep_and_sen
	FROM (
		SELECT id_bioguide, MIN(term_start) AS first_term
		FROM legislators_terms
		WHERE term_type = 'rep'
		GROUP BY 1
	) AS A  -- rep인 경우의 first_term
	JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
		AND B.term_type = 'sen' AND B.term_start > A.first_term
	GROUP BY 1
) AS B2 ON A2.cohort_century = B2.cohort_century;  -- rep & sen


century별 rep and sen의 비율

[A2 테이블]

  • rep인 경우의 id별 first_term
  • century별 그루핑


[B2 테이블]

  • rep인 경우의 id별 first_term
  • legislators_terms와 JOIN
  • id, term type = ‘sen’, rep의 first term보다 sen의 term start가 큰 경우
  • sen의 term start - rep의 first term이 10년 이하인 경우
  • century별 그루핑
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
SELECT
	A2.cohort_century,
	A2.reps,
	B2.rep_and_sen,
	ROUND(B2.rep_and_sen * 1.0 / A2.reps, 4) AS pct_rep_and_sen
FROM (
	SELECT
		DATE_PART('century', A1.first_term) AS cohort_century,
		COUNT(id_bioguide) AS reps
	FROM (
		SELECT id_bioguide, MIN(term_start) AS first_term
		FROM legislators_terms
		WHERE term_type = 'rep'
		GROUP BY 1
	) AS A1  -- id별 rep인 경우의 first_term
	WHERE first_term <= '2009-12-31'
	GROUP BY 1
) AS A2 -- century별 rep인 사람의 수 
LEFT JOIN (
	SELECT
		DATE_PART('century', B1.first_term) AS cohort_century,
		COUNT(DISTINCT B1.id_bioguide) AS rep_and_sen
	FROM (
		SELECT id_bioguide, MIN(term_start) AS first_term
		FROM legislators_terms
		WHERE term_type = 'rep'
		GROUP BY 1
	) AS B1 -- id별 rep인 경우의 first_term
	JOIN legislators_terms AS C ON B1.id_bioguide = C.id_bioguide
		AND C.term_type = 'sen' AND C.term_start > B1.first_term
	WHERE AGE(C.term_start, B1.first_term) <= INTERVAL '10 years'
	GROUP BY 1
) AS B2 ON A2.cohort_century = B2.cohort_century;


century별 rep and sen의 5년, 10년, 15년 비율

  • sen의 term start - rep의 first term이 5년 이하인 경우
  • sen의 term start - rep의 first term이 10년 이하인 경우
  • sen의 term start - rep의 first term이 15년 이하인 경우
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    
    SELECT
      A2.cohort_century,
      ROUND(B2.rep_and_sen_5_yrs * 1.0 / A2.reps, 4) AS pct_5_yrs,
      ROUND(B2.rep_and_sen_10_yrs * 1.0 / A2.reps, 4) AS pct_10_yrs,
      ROUND(B2.rep_and_sen_15_yrs * 1.0 / A2.reps, 4) AS pct_15_yrs
    FROM (
      SELECT
          DATE_PART('century', A1.first_term) AS cohort_century,
          COUNT(id_bioguide) AS reps
      FROM (
          SELECT id_bioguide, MIN(term_start) AS first_term
          FROM legislators_terms
          WHERE term_type = 'rep'
          GROUP BY 1
      ) AS A1  -- id별 rep인 경우의 first_term
      WHERE first_term <= '2009-12-31'
      GROUP BY 1
    ) AS A2 -- century별 rep인 사람의 수 
    LEFT JOIN (
      SELECT
          DATE_PART('century', B1.first_term) AS cohort_century,
          COUNT(DISTINCT CASE WHEN AGE(C.term_start, B1.first_term) <= INTERVAL '5 years'
                              THEN B1.id_bioguide END) AS rep_and_sen_5_yrs,
          COUNT(DISTINCT CASE WHEN AGE(C.term_start, B1.first_term) <= INTERVAL '10 years'
                              THEN B1.id_bioguide END) AS rep_and_sen_10_yrs,
      COUNT(DISTINCT CASE WHEN AGE(C.term_start, B1.first_term) <= INTERVAL '15 years'
                              THEN B1.id_bioguide END) AS rep_and_sen_15_yrs
      FROM (
          SELECT id_bioguide, MIN(term_start) AS first_term
          FROM legislators_terms
          WHERE term_type = 'rep'
          GROUP BY 1
      ) AS B1 -- id별 rep인 경우의 first_term
      JOIN legislators_terms AS C ON B1.id_bioguide = C.id_bioguide
          AND C.term_type = 'sen' AND C.term_start > B1.first_term
      GROUP BY 1
    ) AS B2 ON A2.cohort_century = B2.cohort_century;
    

This post is licensed under CC BY 4.0 by the author.