/* EXERCICE 1 */ /* question 1 */ data tempnorm; infile 'C:\Users\Gabriela Ciuperca\M2PRO\logiciel_SAS\exam_an2022\thermometry.csv' DLM=',' FIRSTOBS=2; input tempcorps sexe $ freqcard; run; /* question 2 */ proc univariate data=tempnorm normal; var tempcorps ; histogram /normal VSCALE=count VAXISLABEL="nombre"; TITLE "Histogramme des données de température corporelle"; run; /* normale */ /* question 3 */ PROC TTEST DATA=work.tempnorm ALPHA=.05 H0=98.6; VAR tempcorps; RUN; /* Rejet de l'hypothèse nulle (avec une valeur P < 0,0001) */ /* avec proc univariate :*/ proc univariate mu0 = 98.6 data = work.tempnorm ; var tempcorps; run ; /* question 4 */ proc sort DATA=work.tempnorm; by tempcorps; run; proc print data=tempnorm; run; /* Selon les deux méthodes de réalisation t-test, Pooled (variance égale) et Satterthwaite (variance inégale), il est évident que la température corporelle moyenne est différente pour les deux sexes. La température corporelle est plus basse chez les hommes (sexe=1). */ /* question 5 */ proc means mean clm alpha = .05 data = work.tempnorm; var tempcorps ; run ; /* l'intervalle de confiance 95% ne contient pas 98.6 */ /* question 6 */ PROC MEANS DATA=work.tempnorm NOPRINT; VAR tempcorps; OUTPUT OUT=work.stat (drop=_type_ _freq_) MIN=the_min MEAN=the_mean STD=the_sd Q1=the_quart1 MEDIAN=the_median Q3=the_quart3 MAX=the_max; RUN; proc print data=stat; run; /* question 7 */ data work.tempnorm_vm; set work.tempnorm; if rand('Normal', 0.5) <0.01 then tempcorps = .; if rand('Normal', 0.5) < 0.01 then freqcard = .; run; proc sort data=work.tempnorm_vm; by sexe; run; proc means data=work.tempnorm_vm; by sexe; output out=work.stat_vm (drop = _TYPE_ _FREQ_) max= maxx; run; proc print data=stat_vm; run; /* remplacer avec le max (du groupe) */ /* question 8 */ data work.tempnorm_no_vm; merge work.tempnorm_vm work.stat_vm; by sexe; if tempcorps=. then tempcorps =maxx; run; proc print data=tempnorm_no_vm; run; /* question 9 */ /* macros variables */ data _null_; set work.stat; call symput('p25',the_quart1); call symput('p50',the_median); call symput('p75',the_quart3); run; %put &p25; %put &p50; %put &p75; data work.tempnorm;; set work.tempnorm; if tempcorps =. then tempcorps_qual = ' '; else if tempcorps <= &p25 then tempcorps_qual='a'; else if tempcorps <= &p50 then tempcorps_qual='b'; else if tempcorps <= &p75 then tempcorps_qual='c'; else tempcorps_qual='d'; run; /* question 10 */ proc freq data=work.tempnorm; tables sexe*tempcorps_qual/ chisq; run; /* Exercice 2 (Graphique) */ /* question 1 */ PROC BOXPLOT DATA=work.tempnorm; PLOT tempcorps * sexe / BOXSTYLE=schematic BOXCONNECT=mean CFRAME=vligb CBOXES=dagr CBOXFILL=ywh NOHLABEL; TITLE "Boxplot température du corps"; RUN; /* question 2 */ SYMBOL1 V=circle C=black I=None; SYMBOL2 V=star C=red I=None; TITLE "Tracé de la fréquence cardiaque en fonction de la température corporelle"; PROC GPLOT DATA=work.tempnorm; PLOT tempcorps*freqcard=sexe; RUN; QUIT; /* EXERCICE 3: Macros */ /* question 1 */ data _null_; set stat; call symput("moyenne",the_mean); call symput("ecarttype",the_sd); run; %put &moyenne; %put &ecarttype; data work.outliers; set work.tempnorm; if tempcorps lt (&moyenne - 3*&ecarttype) or tempcorps gt (&moyenne + 3*&ecarttype) then output; run; proc print data=work.outliers noobs; run; /* une seule obs */ /* question 2 */ options mprint mlogic; %macro descriptive(tab,v1,v2,v3,v4); proc means data=&tab; var &v1 &v2; proc freq data=&tab; tables &v3 &v4; run; %mend; %descriptive(tempnorm,tempcorps,freqcard,sexe,tempcorps_qual); /* question 3 */ %macro traitement_vm(variable); retain _var (.); if not missing(&variable) then _var = &variable; else &variable = _var; drop _var; run; %mend; data tempnorm_no_vm; set tempnorm_vm; %traitement_vm(tempcorps) run;