*** INTRODUCTION TO STATA
*
*  		Content
*
*  1) GETTING STARTED
*
*  1.1 Inputing data into Stata, global macros
*  1.2 Saving output
*  1.3 Inpsection of data (table, tabulate, sum etc), merging data
*  1.4 Labeling variables and observations
*
*  2) OPERATIONS WITH DATA
*
*  2.1 Generating variables
*  2.2 Creation of dummy variables
*  2.3 Collapse - making a dataset of means, sums, medians, etc
*  2.4 Generating random variables
*  2.5 Generating string variables
*  2.6 Simple regressions
*  2.7 Displaying regression results
*  2.8 Hypotheses testing
*  2.9 Calculation of predicted values
*  2.10 Working with time series and panel data, lag/difference operators
*
*  3) WORKING WITH SCALARS AND MASSIFS OF DATA
*
*  3.1 Scalar creation and operations
*  3.2 Matrix / vector creation and operations
*  3.3 Retrieving values into scalars and matrices from outputs
*




*  -----------------------------------------------------------
*  1) GETTING STARTED
*  -----------------------------------------------------------
*  1.1 INPUTING DATA INTO STATA, GLOBAL MACROS
more

version 6.0  

capture clear _all  /* CLEARS THE MEMORY */
drop _all  /* DELETES ALL VARIABLES */
mat drop _all  /* DELETES ALL MATRICES */

global path1="d:\labor" /* SETTING WORKING DIRECTORY */
use $path1\1_u_981.dta, clear  /* OPENING DATA FILE */

more 
sum /*SUMMARY OF BASIC DESCRIPTIVE STATISTICS */

*  -----------------------------------------------------------
*  1.2 SAVING OUTPUT
more

log using $path1\do1, replace /* TO START A LOG and REPLACE if the file exists */
* use "log off"/"log on" to temporarily stop logging / to resume logging 
* "log close" to stop logging and close session.log
more

***  -----------------------------------------------------------
***  1.3 INSPECTION OF DATA, MERGING DATA  
more

* - INSPECTION OF DATA

more
list dcode dname /*LIST VALUES OF SELECTED or ALL VARIABLES*/
more
sum date /*SUMMARY OF BASIC DESCRIPTIVE STATISTICS for selected variable(s)*/
more
sum /*SUMMARY OF BASIC DESCRIPTIVE STATISTICS for all variables*/
more
sum u_wh, detail /*DETAILED STATISTICS*/
more
inspect u_wh /*INSPECTION OF VARIABLES*/
more
sort dcode /*SORTS ALL OBSERVATIONS BY DISTRICT CODE*/
more 
tab date /* DISPLAYS A TABLE OF FREQUENCIES OF VARIABLE "DATE"*/
more
table date /* DISPLAYS A TABLE OF STATISTICS FOR "VAR1" */
more
table date, c(m u_tot) /* DISPLAYS THE MEANS OF "U_TOT" BY "DATE" */
more


* - command APPEND: 
* adding additional observations to the existing variables, 
* is equivalent to an increase in time-series dimension

more
append using $path1\1_u_982.dta /*APPENDING SECOND MONTH */
more
tab date
more
sum
more
append using $path1\1_u_983.dta /*APPENDING THIRD MONTH */
more
tab date
more
sum
more
append using $path1\1_u_984.dta /*APPENDING FOURTH MONTH */
more
tab date
more
sum
more

* - command MERGE: 
* adding additional variables to the existing dataset,
* is equivalent to an increase in cross-section dimension

more
merge using $path1\1_v_981-984.dta /*ADDING VACANCY DATA */ 
more
tab date
more
sum
more
* _merge = 1 if the resulted obs. was present in the 1st dataset
* _merge = 2 if the resulted obs. was present in the 2nd dataset
* _merge = 3 if the resulted obs. was present in both datasets
more


*  -----------------------------------------------------------
*  1.4 LABELING VARIABLES AND OBSERVATIONS
more

* - LABELING VARIABLES
more

* assigning a label to the variable "dname"
label var dname "district name"
more

* viewing label and format of "dname"
describe dname
more

* - LABELING OBSERVATIONS
more

* assigning a label to all values of the variable "date"
* which equal to 9801 
label define date 9801 "the first month of 1998"
more

* to view labels for "date"
label list date
more

*  -----------------------------------------------------------
*  2) OPERATIONS WITH DATA
*  -----------------------------------------------------------
*  2.1 GENERATING VARIABLES
more

*  Generating unemployment rate and vacancy rate
gen ur_tot = 100*u_tot/lf   
gen vr_tot = 100*v_tot/lf
more

*  -----------------------------------------------------------
*  2.2 CREATION OF DUMMY VARIABLES
more

egen d=diff(u_blu u_wh)
more
tab d  /* to view statistics */ 
more

*  -----------------------------------------------------------
*  2.3 COLLAPSE - MAKING A DATASET OF MEANS, SUMS, MEDIANS, ETC
more

preserve /* PRESERVES THE DATA */
collapse(mean) u_tot u_dis u_blu u_wh, by(date) /*PRODUCES SUMMARY STATISTICS*/
* (- MEAN in this exapmle - BY "DATE" AND CREATES A NEW DATASET
more
list /*TO VIEW THE NEW DATASET*/
more
restore /* RESTORES THE ORIGINAL DATA*/
more

*  -----------------------------------------------------------
*  2.4 GENERATING RANDOM VARIABLES
more

gen eu = uniform() /* GENERATES A UNIFORMLY DISTRIBUTED "EU" OVER [O,1] */
gen en = invnorm(uniform()) /* GENERATES "EN" ~N(0,1) */
more
sum eu en /* displays statistics */
drop eu  /* removes variable eu */
more

*  -----------------------------------------------------------
*  2.5 GENERATING STRING VARIABLES
more

gen str25 s1 = "This is a string"/* GENERATES A STRING "S1" 25 SYMBOLS LONG */
more
list s1 if _n <=5 /* shows "s1" for the first 5 observations */
more
drop s1 /* deletes "s1" */
more

*  -----------------------------------------------------------
*  2.6 SIMPLE REGRESSIONS
more

reg ur_tot vr_tot /* OLS REGRESSION WITH A CONSTANT TERM */
more
reg ur_tot vr_tot, noc /*OLS REGRESSION WITHOUT A CONSTANT*/
more
reg ur_tot vr_tot,  robust /* OLS with heteroscedastisity robust std. errors */
more
reg ur_tot vr_tot if date == 9801
more

*  -----------------------------------------------------------
*  2.7 DISPLAYING REGRESSION RESULTS
more
disp_res  /*DISPLAYS ALL SAVED REGRESSION RESULTS*/
disp _result(1)  /*DISPLAYS THE FIRST RESULT*/
more

*  -----------------------------------------------------------
*  2.8 HYPOTHESES TESTING
more
test _cons   /* TESTING H0: CONST=1 */
test vr_tot==-1 /* TESTING H0: COEF=-1 */
test _cons==0, accum /* TESTING H0: COEF=-1 AND CONST=0 */
more

*  -----------------------------------------------------------
*  2.9 CALCULATION OF PREDICTED VALUES
more
reg ur_tot vr_tot
predict ur_head /* PREDICTING THE LEFT-HAND SIDE VARIABLE */
more
graph ur_tot ur_head /* PLOTTING ACTUAL VERSUS FITTED VALUES */
more

*  -----------------------------------------------------------
*  2.10 WORKING WITH TIME SERIES AND PANEL DATA, LAG/DIFFERENCE OPERATORS
more

sort date dcode /* sorting data by time and cross-sectional variables*/
tis(date)  /* DECLARING TIME VARIABLE */
iis(dcode) /* DECLARING CROSS-SECTION VARIABLE */
tsset dcode date /* SETTING PANEL */
more

* Note: if you have time series only, type:
* tsset date
more

* working with time series or panel data
more

* LAG OPERATORS
reg ur_tot L.ur_tot /*OLS regression of UR on UR(-1) */
reg ur_tot L.ur_tot L2.ur_tot /*OLS regression of UR on UR(-1) and UR(-2) */
more

* DIFFERENCE OPERATORS
reg ur_tot L.ur_tot /*OLS regression of UR on {UR-UR(-1)} */
reg ur_tot L2.ur_tot /*OLS regression of UR on {UR-UR(-2)} */
more

*  -----------------------------------------------------------
*  3) WORKING WITH SCALARS AND MASSIFS OF DATA
*  -----------------------------------------------------------
*  3.1 SCALAR CREATION AND OPERATIONS
more

sca de sc1 = 10*2  	/* DEFINES SCALAR SC1 */
sca de sc2 = 0.2        /* DEFINES SCALAR SC2 */
more
sca list sc1 sc2	/* LISTS SCALARs SC1 SC2 */
more
sca de sc3 = sc1/sc2    /* OPERATION WITH SCALARS: DIVISION */
more
sca de sc4 = sc3^2	/* OPERATION WITH SCALARS: TAKING A POWER 2 */
more
sca l 			/* LISTS ALL SCALARS */
more
sca  drop sc1	/* ELIMINATES DEFINED SCALAR(S) FROM MEMORY */
sca drop _all	/* ELIMINATES ALL SCALARS FROM MEMORY */
more

* -----------------------------------------------------------
* 3.2. MATRIX / VECTOR CREATION AND OPERATIONS
more

set matsize 800 /* SETS MAXIMUM MATSIZE = 800 */ 
more

mat ID = I(4) /* DECLARING ID IDENTITY MATRIX 4x4 */
more
mat list ID  /* DISPLAYING MATRIX ID */
mat l ID     /* DISPLAYING MATRIX ID */
more

mkmat ur_tot vr_tot, mat(A)   /* CONVERTS TWO VARIABLES TO MATRIX A */
more

log off  /* Temporarily stops logging */ 
mat list A  /* LISTS MATRIX A */
log on /* Resumes logging */
more

mat B = A'*A  /* MATRIX B = TRASPOSED MATRIX A * MATRIX A */
mat list B
more

mat C = B*B - B /* EXAMPLE OF MATRIX ALGEBRA */
mat list C
more

svmat C /* CONVERTS MATRIX C INTO NEW VARIABLES C1 AND C2 */
more
list C1 C1 if _n<=5   /* lists C1 and C2  for the first 5 observations*/
more

/*  DEFINING A VECTOR V AND CALCULATING THE SUM OF SQUARES OF ITS ELEMENTS */
mkmat ur_tot, mat(V)
mat V2 = V'*V
mat list V2
more

mat X = J(3,4,0) /* DEFINES 3X4 MATRIX X WHICH CONTAINS ALL ZERO ELEMENTS */
mat list X  /* displays matrix X */
more

mat X[1,3] = 100 /* REPLACES THE VALUE OF THE ELEMENT [1,3] OF MATRIX X */
mat list X  /* displays matrix X */
more

mat colnames X = a b c d	/* SETTING MATRIX COLUMN NAMES */
mat rownames X = R1 R2		/* SETTING MATRIX ROW NAMES */
mat l X 			/* displays matrix X */
more

* -----------------------------------------------------------
* 3.3 MATRIX FUNCTIONS RETURNING SCALARS 

sca rn = rowsof(X)	/* RETURNS NUMBER OF ROWS OF X TO SCALAR RN */
sca cn = colsof(X)	/* RETURNS NUMBER OF COLUMNS OF X TO SCALAR CN */
sca dt = det(ID)	/* RETURNS THE DETERMINANT OF ID TO SCALAR DT */ 
sca tr = trace(ID)      /* RETURNS THE DETERMINANT OF ID TO SCALAR TR */
sca sc13 = el("X",1,3)	/* RETURNS THE ELEMENT [1,3] OF X TO SCLAR SC13 */   
sca di	/* DISPLAYS ALL SCALARS , EQUIVALENT TO "SCA LIST" */
more

* -----------------------------------------------------------
* 3.4 RETRIEVING VALUES INTO SCALARS AND MATRICES FROM OUTPUTS
*
* FORMING SCALARS
more


sum ur_tot	
disp_res	/* DISPLAYING ALL STORED VALUES */ 
more
scalar c_nobs = _result(1)	/* RETRIEVING THE NUMBER OF OBSERVATIONS */ 
scalar c_mean = _result(3)	/* RETRIEVING THE MEAN OF UR_TOT */
scalar c_min= _result(5)	/* RETRIEVING THE MINIMUM VALUE OF UR_TOT */ 
scalar c_max= _result(6)	/* RETRIEVING THE MAXIMUM VALUE OF UR_TOT */
sca list 	/* DISPLAYS ALL SCALARS */ 
more
	
return list 	/* ANOTHER WAY TO DISPLAY AND ACCESS ALL STORED OUTPUT */  
more
	
reg  ur_tot vr_tot	
more
disp_res	/* DISPLAYING STORED REGRESSION RESULTS */ 
more
scalar c1 = _result(1)	/* RETRIEVING _RESULT(1) INTO A SCALAR */
more
sca  list c1	
more
	
reg  ur_tot vr_tot	
more
estimates list	/* ANOTHER WAY TO DISPLAY AND ACCESS ALL STORED OUTPUT */  
more

* FORMING MATRICES
more

reg  ur_tot vr_tot	
more
matrix list e(b)	/* DISPLAYS MATRIX OF COEFFICIENTS */
more
matrix list  e(V)	/* DISPLAYS VARIANCE-COVARIANCE MATRIX */
more
	
estimates list	/* ANOTHER WAY TO DISPLAY AND ACCESS ALL STORED OUTPUT*/
more

*  -----------------------------------------------------------
*  4) EXAMPLES OF REGRESSIONS
*  -----------------------------------------------------------
*  4.1 MEASUREMENT ERROR BIAS: IMPACT OF  ERROR IN RIGHT-HAND SIDE VARIABLES
more

reg  ur_tot vr_tot
sum ur_tot vr_tot
more
g vr_tot2 = vr_tot + en  /* ADDING N(0,1) ERROR TERM */
more
reg  ur_tot vr_tot2
more

*  -----------------------------------------------------------
*  4.2 EFFECT OF MULTICOLLINEARITY
more

* A) WEAK MULTICOLLINEARITY

corr  vr_tot vr_tot2
more
reg  ur_tot vr_tot
reg  ur_tot vr_tot  vr_tot2
more

* B) STRONG MULTICOLLINEARITY

g vr_tot3 = vr_tot*(1+0.1*en)  /* en = N(0,1) */
corr  vr_tot vr_tot3
more
reg  ur_tot vr_tot
reg  ur_tot vr_tot  vr_tot3
more
reg  ur_tot vr_tot3
more

*  -----------------------------------------------------------
*  4.3 FIXED / RANDOM EFFECT MODEL
more

reg  ur_tot  vr_tot
more

xtreg  ur_tot  vr_tot, fe
xtreg  ur_tot  vr_tot, re
more

log close /* TO CLOSE A LOG */

