************************************************************************************************************************* * This STATA DO file produces "cleandata_pap.dta", the dataset used for Besley and Kudamatsu (2006) * "Health and Democracy", American Economic Review, 96(2), pp.313-318. * For replication, make sure that the STATA data file "basesample_pap.dta" * is saved under the directory specified in the third line. * For any inquiry, email Masayuki Kudamatsu at . ************************************************************************************************************************* version 8.0 clear cd Z:\aea2006\ /* Change this where appropriate. */ capture log close log using cleaning_pap.log, replace set more off set memory 100m use "basesample_pap.dta", clear /* This is the data file containing only variables obtained from the existing data sources.*/ ******************************* * 1. EDIT DEPENDENT VARIABLES * ******************************* ** 1.1 Life Expectancy at Birth ** * 1.1.1 Drop irrelevant years * /* This is necessary as some countries (especially rich ones) tend to have yearly observations on life expectancy. */ gen smplyr1 = year==1962 | year==1967 | year==1972 | year==1977 | year==1982 | year==1987 | year==1992 | year==1997 | year==2002 replace lifeexp=. if smplyr1 == 0 label variable lifeexp "Life Expectancy at Birth (years)" drop smplyr1 ** 1.2 Infant Mortality ** * 1.2.1 Drop irrelevant years * /* This is necessary as some countries (especially rich ones) tend to have yearly observations on infant mortality. */ gen smplyr2 = year==1960 | year==1970 | year==1980 | year==1990 | year==2000 replace infmort=. if smplyr2 == 0 label variable infmort "Infant Mortality (per 1000 live births)" drop smplyr2 ** 1.3 Sanitation ** label variable sanitation "Access to Improved Sanitation Facilities (% of population)" ** 1.4 Clean Water ** label variable water "Access to Improved Water Sources (% of population)" ** 1.5 DPT Immunization ** * 1.5.1 Drop irrelevant years * replace imm_dpt=. if year==1980 | year>=2001 * 1.5.2 Obtain 5-year average * gen period4 = 0 replace period4 = 1 if year>=1981 & year<=1985 replace period4 = 2 if year>=1986 & year<=1990 replace period4 = 3 if year>=1991 & year<=1995 replace period4 = 4 if year>=1996 & year<=2000 egen meanimm_dpt = mean(imm_dpt) if period4~=0, by(period4 ccode) label variable meanimm_dpt "DPT Immunization (% of children aged 12-23 months, 5-year average)" ** 1.6 Measles Immunization ** * 1.6.1 Drop irrelevant years * replace imm_msl=. if year==1980 | year>=2001 * 1.6.2 Obtain 5-year average * egen meanimm_msl = mean(imm_msl) if period4~=0, by(period4 ccode) label variable meanimm_msl "Measles Immunization (% of children aged 12-23 months, 5-year average)" drop period4 ** 1.7 Government Health Expenditure Per Capita ** * 1.7.1 Drop irrelevant years * replace hexpgovppp=. if year==1995 | year>=2001 * 1.7.2 Obtain GDP deflator from PWT6.1 * gen gdpdef = cgdp/rgdpch /* 1 for 1996 */ * 1.7.3 Convert health expenditure into real terms * gen realgovhexp = hexpgovppp/gdpdef * 1.7.4 Calculate 1996-2000 average real government health expenditure per capita * egen meangovhexp = mean(realgovhexp) if year>=1996 & year<=2000, by(ccode) label variable meangovhexp "Government Health Expenditure Per Capita (constant 1996 international dollars, 5-year average)" ******************************** * 2. EDIT DEMOCRACY VARIABLES * ******************************** *** 2.1 Adjust POLITY2 variable *** ** 2.1.1 Burundi's POLITY2 is missing for 2002 and 2003 due to regime transition ongoing *** replace polity2=0 if code=="BDI" & (year==2002 | year==2003) ** 2.1.2 Peru's POLITY2 is missing for 2000 due to regime transition *** replace polity2=5 if code=="PER" & year==2000 /* POLITY2 for Peru is 1 in 1999 and 9 in 2001. */ ** 2.1.3 Replace POLITY2 values for former Soviet republics with one for Soviet Union *** gen temp1 = polity2 if code=="SUN" egen polity2sun = max(temp1), by(year) replace polity2=polity2sun if polity2==. & (code=="ARM" | code=="AZE" | code=="BLR" | code=="EST" | code=="GEO" | code=="KAZ" | code=="KGZ" | code=="LTU" | code=="LVA" | code=="MDA" | code=="RUS" | code=="TJK" | code=="TKM" | code=="UKR" | code=="UZB") /* "if polity2==." ensures that polity2 values for Baltic states in the 1920s-30s are unchanged. */ ** 2.1.4 Replace POLITY2 values for Czech and Slovak Republics with one for Czechoslovakia *** gen temp2 = polity2 if code=="CSK" egen polity2csk = max(temp2), by(year) replace polity2=polity2csk if polity2==. & (code=="SVK" | code=="CZE") ** 2.1.5 Replace POLITY2 values for former Yugoslav republics with one for Yugoslavia *** gen temp3 = polity2 if code=="YUG" egen polity2yug = max(temp3), by(year) replace polity2=polity2yug if polity2==. & (code=="MKD" | code=="SVN" | code=="HRV" | code=="YGS" | code=="BIH") ** 2.1.6 Replace POLITY2 values for Eritrea with one for Ethiopia since 1952 *** /* Eritrea should be seen as colony until 1951; it was a colony of Italy for 1880-1941, then of Britain for 1941-5, UN protectorate for 1945-1952, and annexed to Ethiopia in 1952. See http://www.open.org/~tfl/eritrea/history.html */ gen temp4 = polity2 if code=="ETH" egen polity2eth = max(temp4), by(year) replace polity2=polity2eth if polity2==. & code=="ERI" & year>=1952 ** 2.1.7 Replace POLITY2 values for Bangladesh with one for Pakistan until independence *** gen temp7 = polity2 if code=="PAK" egen polity2pak = max(temp7), by(year) replace polity2=polity2pak if polity2==. & code=="BGD" ** 2.1.8 Replace POLITY2 values for Vietnam with one for North Vietnam until unification *** /* Whether North or South Vietnam is appropriate is irrelevant as both were non-democratic. */ gen temp8 = polity2 if ccode==816 egen polity2drv = max(temp8), by(year) replace polity2=polity2drv if polity2==. & code=="VNM" ** 2.1.9 Replace POLITY2 values for North/South Korea with one for Korea until 1910 *** gen temp5 = polity2 if ccode==730 egen polity2korea = max(temp5), by(year) replace polity2=polity2korea if polity2==. & (code=="KOR" | code=="PRK") drop temp* polity2csk polity2sun polity2yug polity2eth polity2korea polity2pak polity2drv *** 2.2 Democracy Dummy *** gen democracy = polity2>0 if polity2~=. /* Following Persson (2005) and Persson and Tabellini (2005) */ *** 2.3 Construct DEMOCRACY since t-4 *** gen democstm4 = 0 if year>=1960 & year<=2002 label variable democstm4 "DEMOCRACY since t-4" local i=1960 while `i'<=2002 { egen numdemoc`i'=sum(democracy) if year>=`i'-4 & year<=`i', by(ccode) gen meandemoc`i'=numdemoc`i'/5 replace democstm4 = meandemoc`i' if year==`i' drop numdemoc`i' meandemoc`i' local i=`i'+1 } replace democstm4=. if polity2==. *** 2.4 Construct DEMOCRACY since 1956 *** ** 2.4.1 Cumulative number of democratic years since 1956 ** gen numdemocpost56 = 0 if year>=1956 & year<=2002 so ccode year by ccode: replace numdemocpost56 = sum(democracy) if year>=1956 & year<=2003 ** 2.4.2 Fraction of democratic years since 1956 ** gen tempyear = year-1955 /* This is going to be used in 4.3.2 below as well. */ gen democs56 = numdemocpost56/tempyear label variable democs56 "DEMOCRACY since 1956" drop numdemocpost56 tempyear ** 2.4.3 Drop Germany ** replace democs56=. if code=="GER" ** 2.4.4 5-year lag for columns 3-5 of table 3 ** so ccode year by ccode: gen ldemocs56 = democs56[_n-5] label variable ldemocs56 "DEMOCRACY since 1956" ******************************************* * 3. GENERATE POLITICAL HISTORY VARIABLES * ******************************************* ** 3.1 DEMOCRACY for 1900-1955 ** egen numdemocpre55 = sum(democracy) if year>=1900 & year<=1955 & politysample==1, by(ccode) gen temp1 = numdemocpre55/56 egen democf0055 = max(temp1), by(ccode) label variable democf0055 "DEMOCRACY for 1900-1955" drop temp1 numdemocpre55 ** 3.2 COLONY for 1900-1955 ** gen colony = polity2==. if politysample==1 /* POLITYSAMPLE is a dummy variable equal to 1 if a country is included in the POLITY IV dataset for at least one year. */ egen numcolonypre55 = sum(colony) if year>=1900 & year<=1955 & politysample==1, by(ccode) gen temp2 = numcolonypre55/56 egen colonyf0055 = max(temp2), by(ccode) drop temp2 numcolonypre55 label variable colonyf0055 "COLONY for 1900-1955" ******************************************* * 4. EDIT OTHER CONTROL VARIABLESARIABLES * ******************************************* *** 4.1.1 INCOME *** gen rgdpch1000 = rgdpch/1000 gen income = 0 if year>=1960 & year<=2002 label variable income "INCOME" local i=1960 while `i'<=2002 { egen meanincome`i'=mean(rgdpch1000) if year>=`i'-4 & year<=`i', by(ccode) replace income=meanincome`i' if year==`i' drop meanincome`i' local i=`i'+1 } *** 4.1.2 INCOME squared *** gen income2 = income^2 label variable income2 "INCOME squared" *** 4.2 SCHOOLING in 1960 *** gen tempedu15 = tyr15 if year==1960 egen schooling60 = max(tempedu15), by(ccode) drop tempedu15 label variable schooling60 "SCHOOLING in 1960" *** 4.3 Legal Origin Dummies *** ** 4.3.1 Deal with East Timor ** recode legor_uk .=0 if code=="TMP" recode legor_so .=0 if code=="TMP" recode legor_ge .=0 if code=="TMP" recode legor_sc .=0 if code=="TMP" recode legor_fr .=1 if code=="TMP" *** 4.4 Region dummies *** ** 4.4.1 East Asia and Pacific (omitted in the analysis) ** gen eap = 0 replace eap=1 if code=="AUS" replace eap=1 if code=="BRN" replace eap=1 if code=="BUR" replace eap=1 if code=="CHN" replace eap=1 if code=="FJI" replace eap=1 if code=="FSM" replace eap=1 if code=="IDN" replace eap=1 if code=="JPN" replace eap=1 if code=="KHM" replace eap=1 if code=="KIR" replace eap=1 if code=="KOR" replace eap=1 if code=="LAO" replace eap=1 if code=="MHL" replace eap=1 if code=="MNG" replace eap=1 if code=="MYS" replace eap=1 if code=="NZL" replace eap=1 if code=="PHL" replace eap=1 if code=="PLW" replace eap=1 if code=="PNG" replace eap=1 if code=="PRK" replace eap=1 if code=="SGP" replace eap=1 if code=="SLB" replace eap=1 if code=="THA" replace eap=1 if code=="TMP" replace eap=1 if code=="TON" replace eap=1 if code=="TWN" replace eap=1 if code=="VNM" replace eap=1 if code=="VUT" replace eap=1 if code=="WSM" label variable eap "East Asia and Pacific" ** 4.4.2 Europe and Cenral Asia ** gen eca = 0 replace eca = 1 if code=="ALB" replace eca = 1 if code=="ARM" replace eca = 1 if code=="AZE" replace eca = 1 if code=="BGR" replace eca = 1 if code=="BIH" replace eca = 1 if code=="BLR" replace eca = 1 if code=="CZE" replace eca = 1 if code=="EST" replace eca = 1 if code=="GEO" replace eca = 1 if code=="HRV" replace eca = 1 if code=="HUN" replace eca = 1 if code=="KAZ" replace eca = 1 if code=="KGZ" replace eca = 1 if code=="LTU" replace eca = 1 if code=="LVA" replace eca = 1 if code=="MDA" replace eca = 1 if code=="MKD" replace eca = 1 if code=="POL" replace eca = 1 if code=="ROM" replace eca = 1 if code=="RUS" replace eca = 1 if code=="SVK" replace eca = 1 if code=="SVN" replace eca = 1 if code=="TJK" replace eca = 1 if code=="TKM" replace eca = 1 if code=="TUR" replace eca = 1 if code=="UKR" replace eca = 1 if code=="UZB" replace eca = 1 if code=="YGS" label variable eca "(Eastern) Europe and Central Asia" ** 4.4.3 Latin America and the Caribbean ** gen lac = 0 replace lac = 1 if code=="ARG" replace lac = 1 if code=="ATG" replace lac = 1 if code=="BHS" replace lac = 1 if code=="BLZ" replace lac = 1 if code=="BOL" replace lac = 1 if code=="BRA" replace lac = 1 if code=="BRB" replace lac = 1 if code=="CHL" replace lac = 1 if code=="COL" replace lac = 1 if code=="CRI" replace lac = 1 if code=="CUB" replace lac = 1 if code=="DMA" replace lac = 1 if code=="DOM" replace lac = 1 if code=="ECU" replace lac = 1 if code=="GRD" replace lac = 1 if code=="GTM" replace lac = 1 if code=="GUY" replace lac = 1 if code=="HND" replace lac = 1 if code=="HTI" replace lac = 1 if code=="JAM" replace lac = 1 if code=="KNA" replace lac = 1 if code=="LCA" replace lac = 1 if code=="MEX" replace lac = 1 if code=="NIC" replace lac = 1 if code=="PAN" replace lac = 1 if code=="PER" replace lac = 1 if code=="PRY" replace lac = 1 if code=="SLV" replace lac = 1 if code=="SUR" replace lac = 1 if code=="TTO" replace lac = 1 if code=="URY" replace lac = 1 if code=="VCT" replace lac = 1 if code=="VEN" label variable lac "Latin America and the Caribbean" ** 4.4.4 Middle East and North Africa ** gen mena = 0 replace mena = 1 if code=="ARE" replace mena = 1 if code=="BHR" replace mena = 1 if code=="DJI" replace mena = 1 if code=="DZA" replace mena = 1 if code=="EGY" replace mena = 1 if code=="IRN" replace mena = 1 if code=="IRQ" replace mena = 1 if code=="ISR" replace mena = 1 if code=="JOR" replace mena = 1 if code=="KWT" replace mena = 1 if code=="LBN" replace mena = 1 if code=="LBY" replace mena = 1 if code=="MAR" replace mena = 1 if code=="MLT" replace mena = 1 if code=="OMN" replace mena = 1 if code=="QAT" replace mena = 1 if code=="SAU" replace mena = 1 if code=="SYR" replace mena = 1 if code=="TUN" replace mena = 1 if code=="YEM" label variable mena "Middle East and North Africa" ** 4.4.5 South Asia ** gen sa = 0 replace sa = 1 if code=="AFG" replace sa = 1 if code=="BGD" replace sa = 1 if code=="BTN" replace sa = 1 if code=="IND" replace sa = 1 if code=="LKA" replace sa = 1 if code=="MDV" replace sa = 1 if code=="NPL" replace sa = 1 if code=="PAK" ** 4.4.6 Sub-Saharan Africa ** gen ssa = 0 replace ssa = 1 if code=="AGO" replace ssa = 1 if code=="BDI" replace ssa = 1 if code=="BEN" replace ssa = 1 if code=="BFA" replace ssa = 1 if code=="BWA" replace ssa = 1 if code=="CAF" replace ssa = 1 if code=="CIV" replace ssa = 1 if code=="CMR" replace ssa = 1 if code=="COG" replace ssa = 1 if code=="COM" replace ssa = 1 if code=="CPV" replace ssa = 1 if code=="ERI" replace ssa = 1 if code=="ETH" replace ssa = 1 if code=="GAB" replace ssa = 1 if code=="GHA" replace ssa = 1 if code=="GIN" replace ssa = 1 if code=="GMB" replace ssa = 1 if code=="GNB" replace ssa = 1 if code=="GNQ" replace ssa = 1 if code=="KEN" replace ssa = 1 if code=="LBR" replace ssa = 1 if code=="LSO" replace ssa = 1 if code=="MDG" replace ssa = 1 if code=="MLI" replace ssa = 1 if code=="MOZ" replace ssa = 1 if code=="MRT" replace ssa = 1 if code=="MUS" replace ssa = 1 if code=="MWI" replace ssa = 1 if code=="NAM" replace ssa = 1 if code=="NER" replace ssa = 1 if code=="NGA" replace ssa = 1 if code=="RWA" replace ssa = 1 if code=="SDN" replace ssa = 1 if code=="SEN" replace ssa = 1 if code=="SLE" replace ssa = 1 if code=="SOM" replace ssa = 1 if code=="STP" replace ssa = 1 if code=="SWZ" replace ssa = 1 if code=="SYC" replace ssa = 1 if code=="TCD" replace ssa = 1 if code=="TGO" replace ssa = 1 if code=="TZA" replace ssa = 1 if code=="UGA" replace ssa = 1 if code=="ZAF" replace ssa = 1 if code=="ZAR" replace ssa = 1 if code=="ZMB" replace ssa = 1 if code=="ZWE" label variable ssa "Sub-Saharan Africa" *** 4.4.7 Western Europe and North America *** gen western = 0 replace western = 1 if code=="ADO" replace western = 1 if code=="AUT" replace western = 1 if code=="BEL" replace western = 1 if code=="CAN" replace western = 1 if code=="CHE" replace western = 1 if code=="CYP" replace western = 1 if code=="DNK" replace western = 1 if code=="ESP" replace western = 1 if code=="FIN" replace western = 1 if code=="FRA" replace western = 1 if code=="GBR" replace western = 1 if code=="GER" replace western = 1 if code=="GRC" replace western = 1 if code=="IRL" replace western = 1 if code=="ISL" replace western = 1 if code=="ITA" replace western = 1 if code=="LIE" replace western = 1 if code=="LUX" replace western = 1 if code=="MCO" replace western = 1 if code=="NLD" replace western = 1 if code=="NOR" replace western = 1 if code=="PRT" replace western = 1 if code=="SWE" replace western = 1 if code=="USA" label variable western "Western Europe and North America" *** 4.5 Year Dummies *** qui tab year if year>=1901 & year<=2002, gen(ydum) /* YDUMxx is a dummy for year 1900+xx, where xx is 1 to 102. */ *** 4.6 Country Dummies *** qui tab code, gen(cdum) ****************************************************************** * 5. Create the subsample indicator for Column 2 of Table 2 ****************************************************************** so ccode year by ccode: gen lagdemocracy = democracy[_n-1] if year>=1956 & year<=2002 by ccode: gen transition = democracy - lagdemocracy if year>=1956 & year<=2002 /* TRANSITION is 1 if democratized, -1 if autocratized. */ egen mintransition = min(transition) if year>=1956 & year<=2002, by(ccode) gen autocratized = mintransition == -1 if year>=1956 & year<=2002 /* AUTOCRATIZED is 1 if autocratized at least once */ replace transition=. if (transition == 0 | transition == -1) & year>=1956 & year<=2002 egen numdemocratize = count(transition) if year>=1956 & year<=2002, by(ccode) /* NUMDEMOCRATIZE: The number of democratization during 1956-2002 */ gen subsample = numdemocratize == 1 & autocratized == 0 & year>=1956 & year<=2002 egen count = count(lifeexp) if subsample==1 & income~=., by(ccode) gen balanced = subsample == 1 & count == 9 /* By restricting the sample to countries with COUNT equal to 9, we can make the balanced sample. */ label variable balanced "Dummy for countries in the sample for Column (2) of Table 2" drop lagdemocracy transition mintransition autocratized numdemocratize count ******************************************************* * 6. Drop variables not used for econometric analysis * ******************************************************* drop politysample subsample tyr15 compress sa "cleandata_pap.dta", replace log close