   *               _\|/_
                   (o o)
    +----oOO-{_}-OOo----------------------------------------------------------------------------+
    :                                                                                                                          :
    :                             Version 1.3.1                                                                      :
    :                             March 2013                                                                        :
    :                                                                                                                      :
    +---------------------------------------------------------------------------------------------;


%macro AickinAlpha(data, var1, var2, wt=1, values=, agree=&var1=&var2, outdata=, printalphanow=1, epsilon=1e-8, maxniter=5000, printfmt=8.6, title=, pseudocount=1, printtable=1, level=0.95);
	%local converged  i lastchange lcount level100 m ncells niter nnonemptycells  unlistedvalue unlistedvalues uservalue z;
	%local dsagree dscountsTable dscountsX dscountsXComplete dsdatavaluesNotInValues dsfComplete dsout dsuserlast dsuservalues dsvalues dsvaluesX;

	%let dsuserlast = &syslast;

	%let z = probit((1+&level)/2);
	%let level100 = %sysevalf(100*&level);

	%let dsout = %NewDatasetName(tmpout);

	/*
		This macro computes the alpha coefficient described in:

		Maximum Likelihood Estimation of Agreement in the Constant Predictive Probability Model, and Its Relation to Cohen-s Kappa
		Mikel Aickin
		Biometrics 46, 293-302, June 1990.
	*/;

	%let dsvalues = %NewDatasetName(values);

	proc sql;
		create table &dsvalues as
		select distinct(x.__myxvar) as __myxvar
		from 
			(select distinct(&var1) as __myxvar from &data
			outer union corresponding
			select distinct(&var2) as __myxvar from &data) as x;
	quit;


	%if %length(%superq(values)) %then %do;
		* user called this macro with values defined in values=: why not use them!?!?;
		%let nvalues = %eval(1 + %length(%sysfunc(compbl(&values))) - %length(%sysfunc(compress(&values))));
		%let dsuservalues = %NewDatasetName(uservalues);

		data &dsuservalues;
		%do i = 1 %to &nvalues;
			%let uservalue = %scan(&values, &i, %str( ));
			__myxvar = &uservalue;
			output;
		%end;
		run;

		proc sort data=&dsuservalues; by __myxvar; run;
		proc sort data=&dsvalues; by __myxvar; run;

		%let dsdatavaluesNotInValues = %NewDatasetName(datavaluesNotInValues);

		data &dsdatavaluesNotInValues;
			merge &dsvalues (in=in1) &dsuservalues (in=in2);
			by __myxvar;
			if in1 and not in2;
		run;

		proc sql noprint;
			select N(__myxvar) into: unlistedvalue
			from &dsdatavaluesNotInValues;
		quit;

		%if &unlistedvalue %then %do;
			proc sql noprint;
				select __myxvar into: unlistedvalues separated by ', '
				from &dsdatavaluesNotInValues;
			quit;

			%put ERROR: [macro AickinAlpha] values found in data &data for variable &var1 (or &var2) [=&unlistedvalues] not listed in values given through AickinAlpha values= argument;
			proc datasets nolist; delete &dsdatavaluesNotInValues &dsuservalues; run;
			%goto skip;
		%end;

		proc datasets nolist;
			delete &dsvalues &dsdatavaluesNotInValues;
			change &dsuservalues = &dsvalues;
		run;
	%end;

	%let dsvaluesX = %NewDatasetName(valuesX);	

	proc sql;
		create table &dsvaluesX as
		select a.__myxvar as &var1, b.__myxvar as &var2
		from &dsvalues as a, &dsvalues as b
		where not missing(a.__myxvar) and not missing(b.__myxvar);
	quit;

	%let dscountsX = %NewDatasetName(countsX);	

	proc sql;
		create table &dscountsX as
		select &var1, &var2, sum(&wt) as __count
		from &data
		where not missing(&var1) and not missing(&var2)
		group &var1, &var2;
	quit;

	%let dscountsXComplete = %NewDatasetName(countsXComplete);	

	proc sql;
		create table &dscountsXComplete as
		select coalesce(k.&var1, u.&var1) as &var1, coalesce(k.&var2, u.&var2) as &var2, coalesce(k.__count, 0) as __count
		from &dscountsX as k
		full join &dsvaluesX as u
		on k.&var1 eq u.&var1 and k.&var2 eq u.&var2;
	quit;

	data &dscountsXComplete;
		set &dscountsXComplete;
		__agree = &agree;
	run; 

	%if &printtable %then %do;
		%let dsfComplete = %NewDatasetName(fComplete);	

		proc sql noprint;
			select ceil(log10(2+max(__count))) into :lcount from &dscountsXComplete;
		quit;

		proc sql;
			create table &dsfComplete as
			select &var1, &var2, __count, __count/sum(__count) as __p, . as __i
			from &dscountsXComplete;
		quit;

		proc report data=&dsfComplete  nofs style={rules=none cellspacing=0}  nowd headskip headline missing;
			column &var1 &var2,(__i __count __p);
			define &var1 / group;
			define &var2 / group across;
			define __i / display style={foreground=GWH background=GWH cellwidth=1pt font_size=1};
			define __count / analysis sum 'N' f=&lcount..0;
			define __p / analysis sum '%' f=percent8.2 style={font_style=italic foreground=GRB};
		run;

		proc datasets nolist; delete &dsfComplete; run;
	%end;

		* Treat special case where the two variables compared present only one value (how can there be disagreement in that case?!??!);

		proc sql noprint;
			select N(__count), sum(__count>0), sum(__count) into :ncells, :nnonemptycells, :m
			from &dscountsXComplete;
		quit;

		%if &m = 0 %then %do;
			%let converged = 1;
			data &dsout;
				alpha = .;
				sd_alpha = .;
				n = &m;
				niter = .;
				lastchange = .;
				converged = .;
				output;
			run;
			%goto report;
		%end;
		%else %if &ncells = 1 %then %do;
			%let converged = 1;
			data &dsout;
				alpha = 1;
				sd_alpha = .;
				n = &m;
				niter = 0;
				lastchange = .;
				converged = 1;
				output;
			run;
			%goto report;
		%end;

	%let dscountsTable = %NewDatasetName(countsTable);	

	proc transpose data=&dscountsXComplete out=&dscountsTable (drop=&var1 _NAME_) prefix=__z;
		var __count;
		by &var1;
		id &var2;
	run;


	%let dsagree = %NewDatasetName(agree);	

	proc transpose data=&dscountsXComplete out=&dsagree (drop=&var1 _NAME_) prefix=__d;
		var __agree;
		by &var1;
		id &var2;
	run;

		%if &nnonemptycells = 1 %then %do;
			* return alpha = 0|1 depending on the agreement as defined by the rule in agree=;
			proc iml;
				use &dscountsTable;
				read all into k;

				use &dsagree;
				read all into d;

				ssize = sum(k);
				P0 = sum(d # k) / ssize;
				sd_alpha = .;
				niter = 0;
				change = .;
				converged = 1;

				outres = P0 || sd_alpha || ssize || niter || change || converged;
  				create &dsout from outres [colname={"alpha" "sd_alpha" "n" "niter" "lastchange"  "converged"}];
 				append from outres;
			quit;
			proc datasets nolist; delete &dscountsTable &dsagree; run;
			%goto report;
		%end;


	proc iml;
		use &dscountsTable;
		read all into k;

		use &dsagree;
		read all into d;

		m = nrow(k);

		ssize = sum(k);
		k = k + &pseudocount/m/m; * Aickin-s suggestion to ensure computational convergence;
		n = ssize + &pseudocount;
		J = J(m,1,1);

		r = J` * k`;
		c = k` * J;

		A = sum(d # k);
		P0 = A / n;

		* Initial values;
		pr = r/n;
		pc = c/n;
		s = pr * d * pc;
		alpha = (P0 - s) / (1 - s);
		niter = 0;

		do until(change < &epsilon | niter = &maxniter);
			oldalpha = alpha;
			niter = niter + 1;

			pr = r / n / (1 - alpha + alpha/s*(d * pc)`);
			pr(|1|) = 1 - sum(pr(|2:m|));

			pc = c / n / (1 - alpha + alpha/s*(pr * d)`);
			pc(|1|) = 1 - sum(pc(|2:m|));

			s = pr * d * pc;
			alpha = (P0 - s) / (1 - s);

			change = abs(alpha-oldalpha);
		end;

		if change < &epsilon then converged = 1;
		else converged = 0;

		* estimate variance of alpha MLE;

		d2Lda2 = - n * (1-s) / (1-alpha) / ((1-alpha)*s + alpha); * scalar;
		T = (1-alpha) / alpha; * scalar;
		V = alpha / s / ((1-alpha)*s + alpha); * scalar;

		pcdiff = pc(|2:m|) - pc(|1|); * m-1 x 1;
		prdiff = pr(|2:m|) - pr(|1|); * m-1 x 1;

		d2Ldadpri		= - A * pcdiff / P0 / P0; * m-1 x 1;
		d2Ldadpcj		= - A * prdiff / P0 / P0; * m-1 x 1;
		d2Ldpridprj	= diag(- r(|2:m|) / (pr(|2:m|) # pr(|2:m|)))  - r(|1|) / pr(|1|) / pr(|1|) + A * (2*s*T + 1) * V * V * (pcdiff * pcdiff`); * m-1 x m-1;
		d2Ldpcidpcj	= diag(- c(|2:m|) / (pc(|2:m|) # pc(|2:m|)))  - c(|1|) / pc(|1|) / pc(|1|) + A * (2*s*T + 1) * V * V * (prdiff * prdiff`); * m-1 x m-1;
		d2Ldpridpcj	= - A * V * (J(m-1,m-1,1)+I(m-1)) + A * (2*s*T + 1) * V * V * (pcdiff * prdiff`); * m-1 x m-1;

		/*
			The matrix of second derivatives is composed of vectors and matrices
			computed above.  The variance of alpha-s estimator is the top-left component of its inverse.
			The matrix is symetric and constructed as follows:

						alpha				Pr1  Pr2 ... Prq				Pc1  Pc2 ... Pcq

						------------		--------------------    	-------------------
			alpha   	- d2Lda2  -   	-   t(d2Ldadpri)   -    	-  t(d2Ldadpcj)  -
						------------   	--------------------    	-------------------

			Pr1                    		--------------------    	-------------------  
			Pr2                    		-   d2Ldpridprj   -    	-  d2Ldpridpcj   -         
			...                    			-                      -    	-                     -
			Prq                    		--------------------    	-------------------

			Pc1                                           					-------------------
			Pc2                                           					-  d2Ldpcidpcj   -
			...                                           					-                     -
			Pcq                                           					-------------------
		*/;

		var1 = d2Lda2 || d2Ldadpri` || d2Ldadpcj`; * 1 x 2m-1;
		var2 = d2Ldadpri || d2Ldpridprj ||  d2Ldpridpcj; * m-1 x 2m-1;
		var3 = d2Ldadpcj || d2Ldpridpcj` || d2Ldpcidpcj; * m-1 x 2m-1;
		var = var1` || var2` || var3`; * 2m-1 x 2m-1;
		var = INV(var);
		sd_alpha = sqrt(- var(|1,1|));

		outres = alpha || sd_alpha || ssize || niter || change || converged;
  		create &dsout from outres [colname={"alpha" "sd_alpha" "n" "niter" "lastchange"  "converged"}];
 		append from outres;
	quit;

	proc datasets nolist; delete &dsagree &dscountsTable; run;

	proc sql noprint;
		select niter, lastchange, converged into :niter, :lastchange, :converged
		from &dsout;
	quit;

	%report:

	data &dsout;
		set &dsout;
		lcl = alpha - &z * sd_alpha;
		ucl = alpha + &z * sd_alpha;
		if lcl < 0 and not missing(lcl) then lcl = 0;
		if ucl > 1 then ucl = 1;
	run;

	%if &converged eq 0 %then %do;
		%put ERROR: [macro AickinAlpha] algorithm did not converge (last change = &lastchange > epsilon=&epsilon) after &maxniter iterations;
		%goto skip;
	%end;
	%else %if &printalphanow %then %do;		
		proc report data=&dsout  nofs style={rules=none cellspacing=0} nowd headskip headline missing;
			column alpha sd_alpha n lcl ucl;

			define alpha / analysis "Aickin's alpha" f=&printfmt;
			define sd_alpha / analysis "(sd)" f=&printfmt;
			define n / display "n";
			define lcl /display "&level100% Lower Conf Limit" f=&printfmt;
			define ucl / display "&level100% Upper Conf Limit" f=&printfmt;

			title1 '---- Agreement -----------------------------------------------';
			title2 &title;
		run;
	%end;

	%skip:
	proc datasets nolist; delete &dscountsX &dscountsXComplete &dsvalues &dsvaluesX; run;

	%if %length(%superq(outdata)) %then %do;
		proc datasets nolist; change &dsout=&outdata; run;
	%end;
	%else %do;
		proc datasets nolist; delete &dsout; run;
	%end;;

	%let syslast = &dsuserlast;
%mend AickinAlpha;


%macro NewDatasetName(datasetname);
	%*Finds the first unused dataset named  *datasetname*, adding as many underscores as necessary at its beginning to make it unique!;
	%local newdatasetname;
	%let newdatasetname = %sysfunc(compress(&datasetname));

	%do %while(%sysfunc(exist(&newdatasetname)));
		%let newdatasetname=_&newdatasetname;
	%end;

	&newdatasetname
%mend NewDatasetName;
