   *               _\|/_
                   (o o)
    +----oOO-{_}-OOo----------------------------------------------------------------------------+
    :                                                                                                                        :
    :       Version 1.5                                                                                                :
    :       July 2010                                                                                                :
	:       http://www.medicine.mcgill.ca/epidemiology/Joseph/PBelisle/                :
    :                                                                                                                     :
    +---------------------------------------------------------------------------------------------;


%macro mds2wb(data, xvars, outfile, undef=na, savedim=1, objectname=, format=best12., linesize=100, dimnames=, nvars=, byrow=1);
	%local alphanumericvars AnyMissingIndexValue dim dimname donotprintvalues fmt i indexvar indexvar1 indexvarlen indexvarlens indexvars indexvartype indexvartypes j lcxvar lcxvars lcxvars2 lmax;
	%local m maxnlines mdsSafePrefix multidim nalphanumeric nindexvars nxvars OneObjectSave othervars StructureDim tmp xlens xvar v wbobjectname wbxvar wbxvars;
	%local dsallvalues dsallvaluesandcounts dsAnyMissingIndexValue dscompletenewdata dscontents dslccontents dslinecounts dsnewdata dsnotunique dstmp dsv dsvalues;

	%let lcxvars							= %lowcase(&xvars);
	%let wbxvars							= %sysfunc(translate(&xvars, %str(.), %str(_)));
	%let nxvars							= %mdsntokens(&xvars);
	%let OneObjectSave				= 0;

	%if &byrow %then %let byrow = 1; * make sure ist value is either 0 or 1;

	%if %length(%superq(objectname)) %then %do;
		%let wbobjectname	= %sysfunc(translate(&objectname, %str(.), %str(_)));
		%let OneObjectSave = 1;
	%end;

	%if %length(%superq(nvars)) = 0 %then %let nvars=~;
	%else %let nvars=%sysfunc(strip(&nvars));

	%if &OneObjectSave and &nxvars = 1 %then %let OneObjectSave = 0;

	%if "%upcase(&undef)" eq "NA" %then %do;
		%let undef = .;
	%end;

	%let dscontents = %mdsNewDatasetName(contents);

	proc contents out=&dscontents (keep=name length format varnum type) noprint data=&data; run;

	proc sql noprint;
		select name, length into :lcxvars2 separated by ' ', :xlens separated by ' '
		from &dscontents
		where lowcase(name) in (%mdsdqlist(&lcxvars));
	quit;

	%let dslccontents = %mdsNewDatasetName(lccontents);

	proc sql;
		create table &dslccontents as
		select lowcase(name) as varname, format, varnum, type, length
		from &dscontents
		order by varnum;
	quit;

	proc sql noprint;
		select varname, N(varname), type, length into: indexvars separated by " ", : nindexvars, :indexvartypes separated by " ", :indexvarlens separated by " "
		from &dslccontents
		where varname not in (%mdsdqlist(&lcxvars));
	quit;

	%if &nindexvars = 0 %then %do;
		%put ERROR: No index variable present in mds2wb input data set (&data).;
		proc datasets nolist; delete &dscontents &dslccontents; run;
		%goto skip;
	%end;
	%else %if &nindexvars > 1 or &OneObjectSave %then %let multidim = 1;
	%else %let multidim = 0;

	%if %length(%superq(dimnames)) %then %do;
		%let tmp = %mdsntokens(&dimnames);
		%if &tmp ne &nindexvars %then %do;
			%put ERROR: Length of dimnames (&tmp components: &dimnames) is different from the actual number of index variables (&nindexvars components: &indexvars);
			proc datasets nolist; delete &dscontents &dslccontents; run;
			%goto skip;
		%end;
	%end;

	proc sql noprint;
		select varname, N(varname) into: alphanumericvars separated by ', ', :nalphanumeric
		from &dslccontents
		where type eq 2 and varname in (%mdsdqlist(&lcxvars));
	quit;

	%if &nalphanumeric %then %do;
		%put ERROR: Alphanumeric variables (&alphanumericvars) not allowed in xvars argument.;
		proc datasets nolist; delete &dscontents &dslccontents; run;
		%goto skip;
	%end;

	* check that no index variable has a missing value;

	%let dsAnyMissingIndexValue = %mdsNewDatasetName(anymissingindex);

	proc sql;
		create table &dsAnyMissingIndexValue as
		select
		%do i = 1 %to &nindexvars;
			%if &i gt 1 %then %do;
				or
			%end;
			%let indexvar = %scan(&indexvars, &i);
			missing(&indexvar)
		%end;
		as AnyMissing
		from &data;
	quit;

	proc sql noprint;
		select max(AnyMissing) into :AnyMissingIndexValue
		from &dsAnyMissingIndexValue;
	quit;

	proc datasets nolist; delete &dsAnyMissingIndexValue; run;

	%if &AnyMissingIndexValue %then %do;
		%put ERROR: missing value found in one or more index variable [&indexvars];
		%goto skip;
	%end;

	* check that no index line is repeated in data set;

	%let indexvar1 = %scan(&indexvars, 1);

	%let dslinecounts = %mdsNewDatasetName(linecounts);

	proc sql;
		create table &dslinecounts as
		select N(&indexvar1) as n
		from &data
		group by %mdscommasep(&indexvars);
	quit;

	proc sql noprint;
		select max(n) into: maxnlines
		from &dslinecounts;
	quit;

	proc datasets nolist; delete &dslinecounts; run;

	%mdsSafeVarPrefix(mdsSafePrefix, &data);

	%if &maxnlines > 1 %then %do;
		%let dsnotunique = %mdsNewDatasetName(notunique);

		proc sql;
			create table &dsnotunique as
			select *, N(&indexvar1) as &mdsSafePrefix.n
			from &data
			group by %mdscommasep(&indexvars);
		quit;
		title "Lines below have the same value for index variables (&indexvars),";
		title2 "which is not allowed in mds2wb";
		proc print noobs data=&dsnotunique; var &indexvars &xvars; where &mdsSafePrefix.n > 1;run;
		title;

		%put ERROR: Index variables values repeated (cf. output window).;
		proc datasets nolist; delete &dscontents &dslccontents &dsnotunique; run;
		%goto skip;
	%end;

	data _null_;
	    file &outfile linesize=&linesize;
		%if &OneObjectSave %then %do;
			put "# &objectname";
			%if &byrow %then %do;
				put "# index variables: &indexvars original_variable";
			%end;
			%else %do;
				put "# index variables: original_variable &indexvars";
			%end;
		%end;
		%else %do;
			put "# &wbxvars";
			put "# index variables: &indexvars";
		%end;

		put "#";
	run;

	%if &OneObjectSave %then %do;
		%let lmax	= %mdslmax(&xvars);
		%let dsv	= %mdsNewDatasetName(v);

		data &dsv;
			length varname $ &lmax;
			%do v = 1 %to &nxvars;
				%let xvar = %scan(&xvars, &v);
				v = &v;
				varname = "&xvar";
				output;
			%end;
		run;
		%if &byrow = 0 %then %do;
			data _null_;
				set &dsv;
				file &outfile linesize=&linesize mod;
				if _n_ = 1 then do;
					put "# original_";
					put "# variable";
				end;

				put "#"  @10 (v varname) (5. ": " char&lmax..);
			run;
			proc datasets nolist; delete &dsv; run;
		%end;
	%end;

	%let dsnewdata = %mdsNewDatasetName(newdata);

	proc sql;
		create table &dsnewdata as
		select *, 1 as &mdsSafePrefix.orig
		from &data;
	quit;

	%let dsallvalues	= %mdsNewDatasetName(allvalues);
	%let dstmp			= %mdsNewDatasetName(tmp);
	%let dsvalues		= %mdsNewDatasetName(values);

	%do i = 1 %to &nindexvars;
		%let indexvar			= %scan(&indexvars, &i);
		%let indexvartype	= %scan(&indexvartypes, &i);

		proc sql noprint;
			select varname into :othervars separated by " "
			from &dslccontents
			where varname ne "&indexvar";
		quit;

		proc sql;
			create table &dsvalues as
			select DISTINCT(&indexvar) as &indexvar
			from &data;
		quit;

		proc sort data=&dsvalues; by &indexvar; run;

		data &dsvalues;
			set &dsvalues;
			&mdsSafePrefix.index = _N_;
		run;

		proc sql noprint;
			select N(&indexvar) into: dim
			from &dsvalues;
		quit;

		%let dim=%sysfunc(strip(&dim));

		%if &indexvartype eq 1 %then %do;
			* index variable is numeric;

			proc sql noprint;
				select format into :fmt
				from &dslccontents
				where varname eq "&indexvar";
			quit;

			* If values are the first integers of |N, it is not worth reporting them (unless variable is formatted);

			proc sql noprint;
				select mean(&indexvar-&mdsSafePrefix.index), var(&indexvar-&mdsSafePrefix.index) into :m, :v
				from &dsvalues;
			quit;

			%if &m eq 0 and &v eq 0 and %length(&fmt) eq 0 %then %do;
				data _null_;
					set &dslccontents;
			    	file &outfile linesize=&linesize mod;
					if _n_ = 1 then do;
						put "# &indexvar";
						put "#"  @10  "1-&dim:  1-&dim";
					end;
				run;
			%end;
			%else %do;
				%let fmt = %scan(&fmt 5, 1);

				data _null_;
					set &dsvalues end=eof;
			    	file &outfile linesize=&linesize mod;
					if _n_ = 1 then put "# &indexvar";
					put "#"  @10 (&mdsSafePrefix.index &indexvar) (5. ": " &fmt..);
				run;
			%end;
		%end;
		%else %do;
			* index variable is alphanumeric;
			%let indexvarlen	= %scan(&indexvarlens, &i);

			data _null_;
				set &dsvalues end=eof;
			    file &outfile linesize=&linesize mod;
				if _n_ = 1 then put "# &indexvar";
				put "#"  @10 (&mdsSafePrefix.index &indexvar) (5. ": " char&indexvarlen..);
			run;
		%end;

		proc sql;
			create table &dstmp as
			select %mdscommasep4sql(a, &othervars &mdsSafePrefix.orig), b.&mdsSafePrefix.index as &indexvar
			from &dsnewdata as a, &dsvalues as b
			where a.&indexvar = b.&indexvar;
		quit;
		proc datasets nolist; delete &dsnewdata; change &dstmp=&dsnewdata; run;

		%let StructureDim = &StructureDim &dim;

		%if &i eq 1 %then %do;
			proc sql;
				create table &dsallvalues as
				select &mdsSafePrefix.index as &indexvar
				from &dsvalues;
			quit;
		%end;
		%else %do;
			proc sql;
				create table &dstmp as
				select a.*, b.&mdsSafePrefix.index as &indexvar
				from &dsallvalues as a, &dsvalues as b;
			quit;
			proc datasets nolist; delete &dsallvalues; change &dstmp=&dsallvalues; run;
		%end;
	%end;

	%if &OneObjectSave and &byrow %then %do;
		data _null_;
			set &dsv;
			file &outfile linesize=&linesize mod;

			if _n_ = 1 then do;
				put "# original_";
				put "# variable";
			end;

			put "#"  @10 (v varname) (5. ": " char&lmax..);
		run;
		proc datasets nolist; delete &dsv; run;
	%end;

	%if &OneObjectSave %then %do;
		%if &byrow = 0 %then %let StructureDim = &nxvars &StructureDim;
		%else %let StructureDim = &StructureDim &nxvars;
	%end;

	%let dsallvaluesandcounts = %mdsNewDatasetName(allvaluesandcounts);

	proc sql;
		create table &dsallvaluesandcounts as 
		select *, %mdsDotFormatted(&lcxvars, &xlens)
			0 as &mdsSafePrefix.orig
		from &dsallvalues;
	quit;

	proc datasets nolist; append base=&dsnewdata data=&dsallvaluesandcounts; run;

	%let dscompletenewdata = %mdsNewDatasetName(completenewdata);

	proc sql;
		create table &dscompletenewdata as
		select %mdscommasep(&indexvars),
			%do v = 1 %to &nxvars;
				%let lcxvar = %scan(&lcxvars, &v);
				sum(&lcxvar) as &lcxvar, 
			%end;
			max(&mdsSafePrefix.orig) as &mdsSafePrefix.orig
		from &dsnewdata
		group by %mdscommasep(&indexvars);
	quit;

	proc format;
   		value __na
        	.="NA"
        	other=[&format]
   		 ;
	run;

	data &dscompletenewdata;
		set &dscompletenewdata;
		if &mdsSafePrefix.orig ne 1 then do;
			%do v = 1 %to &nxvars;
				%let lcxvar = %scan(&lcxvars, &v);
				&lcxvar = &undef;
			%end;
		end;
		format &lcxvars __na.;
	run;

	proc sort data=&dscompletenewdata; by &indexvars; run;


	%if &OneObjectSave = 0 or &byrow = 0 or &nxvars = 1 %then %do;
		%do v = 1 %to &nxvars;
			%let lcxvar	= %scan(&lcxvars, &v);
			%let wbxvar	= %scan(&wbxvars, &v, %str( ));

			data _null_;
				set &dscompletenewdata end=eof;
			    file &outfile linesize=&linesize mod;

				if _n_ = 1 then do;
					%if &v = 1 %then %do;
						put;
						put "list(" @;
						%if "&nvars" ne "~" and &nxvars > 1 %then %do;
							put "&nvars = &nxvars, " @;
						%end;

						%if &savedim %then %do;
							%if %length(%superq(dimnames)) %then %do;
								%let dimnames = %sysfunc(translate(&dimnames, %str(.), %str(_)));
								%do j = 1 %to &nindexvars;
									%let dimname = %scan(&dimnames, &j, %str( ));
									%let dim = %scan(&StructureDim, &j + (1-&byrow)*&OneObjectSave);
									%if %length(&dimname) and "&dimname" ne "~" and %length(&dim) %then %do;
										put "&dimname = &dim, " @;
									%end;
								%end;
							%end;
							%else %do;
								%if &OneObjectSave %then %do;
									%let tmp = &wbobjectname;
								%end;
								%else %do;
									%let tmp = &wbxvar;
								%end;

								%if &multidim %then %do;
									put  "&tmp..dim = c(%mdscommasep(&StructureDim)), " @;
								%end;
								%else %do;
									put  "&tmp..len = &StructureDim, " @;
								%end;
							%end;
						%end;
					%end;

					%if &OneObjectSave %then %do;
						%if &v = 1 %then %do;
							put "&wbobjectname = " @;
						%end;
					%end;
					%else %do;
						put "&wbxvar = " @;
					%end;

					%if &OneObjectSave = 0 or &v = 1 %then %do;
						%if &multidim %then %do;
							put "structure(.Data = " @;
						%end;
						put "c(" @;
					%end;
				end;
				
				if eof then do;
					put &lcxvar @;
					%if &OneObjectSave = 0 or &v = &nxvars %then %do;
						put +(-1) ")" @;
						%if &multidim %then %do;
							put ", .Dim = c(%mdscommasep(&StructureDim)))" @;
						%end;
					%end;
					%else %do;
						put  +(-1) @;
					%end;

					%if &v < &nxvars %then %do;
						put "," @;
					%end;
					%else %do;
						put ")";
					%end;
				end;
				else do;
					put &lcxvar +(-1) ', ' @;
				end;
			run;
		%end;
	%end;
	%else %do;
		* Save one object only, by column;
		data _null_;
			set &dscompletenewdata end=eof;
		    file &outfile linesize=&linesize mod;

			if _n_ = 1 then do;
				put;
				put "list(" @;

				%if "&nvars" ne "~" %then %do;
					put "&nvars = &nxvars, " @;
				%end;

				%if &savedim %then %do;
					%if %length(%superq(dimnames))  %then %do;
						%let dimnames = %sysfunc(translate(&dimnames, %str(.), %str(_)));
						%do j = 1 %to &nindexvars;
							%let dimname = %scan(&dimnames, &j, %str( ));
							%let dim = %scan(&StructureDim, &j);
							%if %length(&dimname) and "&dimname" ne "~" and %length(&dim) %then %do;
								put "&dimname = &dim, " @;
							%end;
						%end;
					%end;
					%else  %do;
						put  "&wbobjectname..dim = c(%mdscommasep(&StructureDim)), " @;
					%end;
				%end;

				put "&wbobjectname = structure(.Data = c(" @;
			end;

			if _n_ gt 1 then put +(-1) ", " @;

			%do v = 1 %to &nxvars;
				%let lcxvar = %scan(&lcxvars, &v);
				%if &v gt 1 %then %do;
					put +(-1) ", " @;
				%end;
				put &lcxvar @;
			%end;
				
			if eof then do;
				put +(-1) ")" @;
				put ", .Dim = c(%mdscommasep(&StructureDim))))";
			end;
		run;
	%end;

	proc datasets nolist; delete &dsallvalues &dsallvaluesandcounts &dscompletenewdata &dscontents &dslccontents &dsnewdata &dsvalues; run;
	%skip:
%mend;

%macro mdscommasep(lov);
   %sysfunc(tranwrd(%Qsysfunc(compbl(%sysfunc(strip(&lov)))), %str( ), %str(, )))
%mend;

%macro mdscommasep4sql(datasetindex, lov);
	&datasetindex..%sysfunc(tranwrd(%Qsysfunc(compbl(%sysfunc(strip(&lov)))), %str( ), %str(, &datasetindex..)))
%mend;

%macro mdsDotFormatted(lov, lens);
	%local i l len v;
	%let l = %mdsntokens(&lov);

	%do i = 1 %to &l;
		%let len	= %scan(&lens, &i);
		%let v	= %scan(&lov, &i);
		. as &v length=&len.,
	%end;
%mend;

%macro mdsdqlist( list );
	"%sysfunc(tranwrd(%sysfunc(compbl(&list)),%quote( ),%quote(", ")))"
%mend;

%macro mdslmax(tokens);
	%local ntokens token l res i;
	%let res = 0;
	%let ntokens = %mdsntokens(&tokens);

	%do i = 1 %to %mdsntokens(&tokens);
		%let token = %scan(&tokens, &i);
		%let l = %length(&token);
		%if &l > &res %then %do;
			%let res = &l;
		%end;
	%end;

	%eval(&res)
%mend;

%macro mdsNewDatasetName(proposalname);
	%*Finds the first unused dataset named  *datasetname*, adding a leading underscore and a numeric suffix as large as necessary to make it unique!;
	%local i newdatasetname;
	%let proposalname=%sysfunc(compress(&proposalname));
	%let newdatasetname=_&proposalname;

	%do %while(%sysfunc(exist(&newdatasetname)));
		%let i = %eval(&i+1);
		%let newdatasetname=_&proposalname&i;
	%end;

	&newdatasetname
%mend;

%macro mdsntokens(list);
	%eval(1 + %length(%sysfunc(compbl(&list))) - %length(%sysfunc(compress(&list))))
%mend;

%macro mdsSafeVarPrefix(outprefix, datasets, errorLength=11);
	%local dsContents dsContents0 dsTmp dsUnavailablePrefixes;
	%local d nds nvarnames tmpds tmpprefix ;
	%local i j k;

	%let dsContents		= %mdsNewDatasetName(contents);
	%let dsContents0	= %mdsNewDatasetName(contents0);

	%let nds = %mdsntokens(&datasets);

	%do d = 1 %to &nds;
		%let tmpds = %scan(&datasets, &d, %str( ));

		proc contents data=&tmpds out=&dsContents (keep=name) noprint; run;

		%if &d = 1 %then %do;
			proc datasets nolist; change &dsContents=&dsContents0; run;
		%end;
		%else %do;
			proc datasets nolist; append data=&dsContents base=&dsContents0; delete &dsContents; run;
		%end;
	%end;

	proc sql;
		create table &dsContents as
		select distinct(name)
		from &dsContents0;
	quit;

	proc sql noprint;
		select N(name) into :nvarnames
		from &dsContents;
	quit;

	* find the maximum length for safe prefix;

	%let j = %eval(1 + %sysfunc(floor(%sysevalf(%sysfunc(log(&nvarnames))/%sysfunc(log(27))))));

	%let dsTmp = %mdsNewDatasetName(tmp);

	data &dsTmp;
		%do i = 1 %to &j;
			a&i = 0;
		%end;
		output;
	run;

	data &dsTmp (keep=l varp);
		set &dsTmp;
		length varp $ &j;

		array a{*} a1-a&j;

		do l = 1 to &j;
			kmax = 27**l;
			do k = 1 to kmax;
				varp = "";
				do j = 1 to l;
					a{j} = mod(floor((k-1)/(27**(j-1))), 27) + 1;
					varp = cats(varp, substr("_abcdefghijklmnopqrstuvwxyz", a{j}, 1));
				end;
				output;
			end;
		end;
	run;

	* List prefixes already present in at least one variable name;

	%let dsUnavailablePrefixes = %mdsNewDatasetName(unavailablepref);

	data &dsUnavailablePrefixes (drop=name l);
		set &dsContents;
		length varp $ &j;

		do l = 1 to &j;
			varp = substr(name, 1, l);
			output;
		end;
	run;
	proc sort data=&dsUnavailablePrefixes; by varp; run;

	data &dsUnavailablePrefixes;
		set &dsUnavailablePrefixes;
		by varp;
		if first.varp;
	run;

	proc sort data=&dsTmp; by varp; run;

	data &dsTmp;
		merge &dsTmp (in=in1) &dsUnavailablePrefixes (in=in2);
		by varp;
		if in1 and not in2;
	run;
	proc sort data=&dsTmp; by l varp; run;

	data &dsTmp; 
		set &dsTmp (obs=1);
	run;

	proc sql noprint;
		select l into :lprefix
		from &dsTmp;
	quit;

	data &dsTmp;
		length varp $&lprefix;
		set &dsTmp;
	run;

	proc sql noprint;
		select varp into :&outprefix
		from &dsTmp;
	quit;

	%if &lprefix >= &errorLength %then %do;
		%let lprefix = %sysfunc(compress(&lprefix));
		%put ERROR: safe prefix is of length (&lprefix) greater or equal than that prescribed by errorLength (&errorLength);
	%end;

	proc datasets nolist; delete &dsContents0 &dsContents &dsTmp &dsUnavailablePrefixes; run;
%mend;
