pGpStt/RunGpStt.m at main · ygraigarw/pGpStt

Cannot retrieve contributors at this time

263 lines (205 sloc) 9.93 KB

	%% Simple code to fit QR, Pss and GP models, all the parameters assumed to be fixed in time
	% Mdl(xi, sgm, rho, psi) with xi=xi_0, sgm=sgm_0, ...
	% Will run as is for toy data to check
	% Need to input data as structure (see occurrences of USER INPUT) below
	%
	% QR = non-stationary Quantile Regression threshold
	% Pss = non-stationary PoiSSon could model for threshold exceedances
	% GP = Generalised Pareto model for size of threshold exceedances
	%
	% This is a simplified version of pGpNonStt

	%% Output for further plotting / investigation etc
	% Output from the analysis is saved in a structure C
	% A typical structure C (when 8 different threshold non-exceedance probabilities are used) is
	%
	%% C
	%
	% Nep: [8×1 double] Non-exceedance probabilities for thresholds
	% nNep: 8 Number of NEPs
	% QR: {[1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct]} QR model details (inc posterior sample)
	% Pss: {[1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct]} Pss model details (inc posterior sample)
	% GP: {[1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct]} GP model details (inc posterior sample)
	% RV: {[1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct] [1×1 struct]} RV details (inc posterior sample)
	% RVCmp: [1×1 struct] RV comparison summary
	% PrmSmm: [1x1 struct] Assessment of slope parameter changes
	%
	%% C.QR{q}, C.Pss{q}, C.GP{q} for q=1,2,..., nNep are structures like
	%
	% Lkl: 'QR'
	% nPrm: 1
	% PrmNms: {1×1 cell}
	% Nep: 0.6000
	% nItr: 10000
	% n2Plt: 5000
	% NgtStr: 0.1000
	% AdpItr: 1000
	% AdpBet: 0.0500
	% PrmStr: [2×1 double]
	% AccRat: [10000×1 double]
	% Prm: [10000×2 double]
	% Nll: [10000×1 double]
	% PrmUse: [1×1 double]
	%
	%% Key output for further plotting etc are
	%
	% C.QR{q}.Prm nItr x 1 values of psi0 from MCMC (in general it is safe to use the last 9000; first 1000 might involve "burn-in")
	% C.Pss{q}.Prm nItr x 1 values of rho0 from MCMC (in general it is safe to use the last 9000; first 1000 might involve "burn-in")
	% C.GP{q}.Prm nItr x 2 values of xi0, sigma0 from MCMC (in general it is safe to use the last 9000; first 1000 might involve "burn-in")
	% C.RV{q}.RV nRls x 1 values of return values generated using C.QR.Prm, C.Pss.Prm and C.GP.Prm
	% C.RVCmp.Qnt nNep x 3 Quantlies (2.5%, 50% and 97.5%) at "time end" and quantlies (2.5%, 50% and 97.5%) at "time end" for each NEP

	%% Set up
	clc; clear; clf;
	VrbNms={'$\xi$';'$\sigma$';'$\psi$'};

	%% Simulate a sample of data
	if 1; %for testing

	% True parameters P0=[xi0;sgm0;rho0;psi0;] of linear regression

	%% * USER INPUT * Pick the type of simulated data
	X.Prm0=[-0.3; 2; 20; 2;];

	% Time variable
	% NB A COMMON time value used for observations in the same year
	X.nYr=85;
	tYr=(1:X.nYr)'; % Time in years

	% True parameter estimates per year
	X.XSM0=[ones(X.nYr,1)X.Prm0(1) ones(X.nYr,1)X.Prm0(2) ones(X.nYr,1)X.Prm0(3) ones(X.nYr,1)X.Prm0(4)];

	% Number of occurrences per annum
	tOcc=poissrnd(X.XSM0(:,3));

	% Generate data from GP
	k=0;
	X.nT=sum(tOcc);
	X.Tim=nan(X.nT,1);
	X.Dat=nan(X.nT,1);
	for iY=1:X.nYr;
	for iO=1:tOcc(iY);
	k=k+1;
	X.Tim(k)=tYr(iY)/X.nYr;
	X.Dat(k)=gprnd(X.XSM0(iY,1),X.XSM0(iY,2),X.XSM0(iY,4));
	end;
	end;

	X, % See the structure

	subplot(2,2,1); plot(tYr,tOcc,'ko');
	subplot(2,2,2); plot(X.Tim*X.nYr,X.Dat,'ko');

	end;

	%% *USER INPUT* Read in your data here
	if 0;
	%X.nYr ; % 1 x 1 number of years
	%X.nT ; % 1 x 1 number of occurrences
	%X.Tim ; % nT x 1 years on [0,1] (so that floor((X.Tim*X.nYr)+1) gives the year number
	%X.Dat ; % nT x 1 data
	% load('G:\UoM Climate Change\ssp245.mat');
	Fld=Field;
	% X.Dat=data(:,3);
	% t1=data(:,1);
	X.Dat=POT.(Fld)(:,2);
	t1=POT.(Fld)(:,1);
	t2=(floor(t1(1)):floor(t1(end)))';
	X.Tim = (floor(t1)-floor(t1(1))+1)/(floor(t1(end))-floor(t1(1))+1);
	% X.Tim=(t2-t2(1))/range(t2);
	% X.Tim=1:numel(t1)/numel(t1);
	X.nT=size(t1,1);
	X.nYr = numel(unique(floor(t1)));
	end;

	%% *USER INPUT* Read in your data here - MADAGASCAR ANALYSIS
	if 0;
	load Madagascar.mat;

	nT=size(yrs,1);
	Tim=yrs;
	Dat=HsPOTall;

	% Kevin's code to create structure X, adapted from above
	t1=yrs;
	t2=(floor(t1(1)):floor(t1(end)))';
	X.Tim = (floor(t1)-floor(t1(1))+1)/(floor(t1(end))-floor(t1(1))+1);
	X.nT=size(t1,1);
	X.nYr = numel(unique(floor(t1)));
	X.Dat=Dat;

	plot(X.Tim,X.Dat,'k.');

	end;

	%% *USER INPUT* Specify NEPs to consider
	if 1;
	%C.Nep=(0.6:0.05:0.95)'; % (0.6:0.05:0.9)' is a good range; but maybe you want to use (0.7:0.1:0.9)' to get going
	%C.Nep=(0.7:0.1:0.9)';
	C.Nep=[0.9;(0.95:0.01:0.99)';0.995];
	%
	C.nNep=size(C.Nep,1);
	end;

	%% Estimate extreme value threshold (linear Quantile Regression)
	if 1;

	for iN=1:C.nNep

	C.QR{iN}.Lkl='QR'; % Likelihood
	C.QR{iN}.nPrm=1; % Number of parameters
	C.QR{iN}.PrmNms={'$\psi_0$';}; % Names for parameters
	C.QR{iN}.Nep=C.Nep(iN); % NEP

	C.QR{iN}.nItr=10000; % Number of MCMC iterations - 1e4 minipsim when used in anger
	C.QR{iN}.n2Plt=5000; % Number of iterations from end of chain to "beleive"

	C.QR{iN}.NgtStr=0.1; % Candidate random walk standard deviation - don't change
	C.QR{iN}.AdpItr=1000; % Number of warm up iterations - don't change
	C.QR{iN}.AdpBet=0.05; % Adaptive MC - don't change C.Nep=X.Nep(iN);

	C.QR{iN}.PrmStr=[quantile(X.Dat,C.Nep(iN))]; % Constant starting solution for quantile regression

	C.QR{iN}=McmcStt(X,C.QR{iN}); % Run MCMC algorithm

	C.QR{iN}.PrmUse=mean(C.QR{iN}.Prm(C.QR{iN}.nItr-C.QR{iN}.n2Plt+1:C.QR{iN}.nItr,:))'; % Use posterior mean for subsequent inference

	tStr=sprintf('Mdl%s-Nep%g',C.QR{iN}.Lkl,C.QR{iN}.Nep); pDatStm(tStr); pGI(tStr,2); % Save plot
	tFil=sprintf('MCMC'); save(tFil,'C'); % Save whole chain

	end;

	end;

	%% Estimate rate of threshold exceedance per annum (linear Poisson Process)
	if 1;

	for iN=1:C.nNep

	C.Pss{iN}.Lkl='Pss'; % Likelihood
	C.Pss{iN}.Nep=C.Nep(iN); % NEP
	C.Pss{iN}.PrmNms={'$\rho_0$';}; % Names for parameters
	C.Pss{iN}.nPrm=1; % Number of parameters

	C.Pss{iN}.nItr=10000; % Number of MCMC iterations - 1e4 minipsim when used in anger
	C.Pss{iN}.n2Plt=5000; % Number of iterations from end of chain to "beleive"

	C.Pss{iN}.NgtStr=0.1; % Candidate random walk standard deviation - don't change
	C.Pss{iN}.AdpItr=1000; % Number of warm up iterations - don't change
	C.Pss{iN}.AdpBet=0.05; % Adaptive MC - don't change C.Nep=X.Nep(iN);

	% Estimate Poisson count for threshold exceedances
	t1=(X.Dat-ones(X.nT,1)*C.QR{iN}.PrmUse(1))>0; % Threshold exceedances
	for iY=1:X.nYr;
	t2=floor(X.TimX.nYr)>=(iY-1) & floor(X.TimX.nYr)<iY; % Particular year
	C.Pss{iN}.Cnt(iY,:)=sum(t1(t2==1));
	C.Pss{iN}.CntTim(iY,:)=(iY-0.5)/X.nYr; % Take middle of year
	end;

	% Constant starting solution from Poisson fit
	C.Pss{iN}.PrmStr=[poissfit(C.Pss{iN}.Cnt)];

	C.Pss{iN}=McmcStt(X,C.Pss{iN}); % Run MCMC algorithm

	tStr=sprintf('Mdl%s-Nep%g',C.Pss{iN}.Lkl,C.Pss{iN}.Nep); pDatStm(tStr); pGI(tStr,2); % Save plot
	tFil=sprintf('MCMC'); save(tFil,'C'); % Save whole chain

	end;

	end;

	%% Estimate size of threshold exceedance per annum (linear Generalised Pareto)
	if 1;

	for iN=1:C.nNep

	C.GP{iN}.Lkl='GP';
	C.GP{iN}.Nep=C.Nep(iN); % NEP
	C.GP{iN}.PrmNms={'$\xi_0$';'$\sigma_0$';}; % Names for parameters
	C.GP{iN}.nPrm=2; % Number of parameters

	C.GP{iN}.nItr=10000; % Number of MCMC iterations - 1e4 minipsim when used in anger
	C.GP{iN}.n2Plt=5000; % Number of iterations from end of chain to "beleive"

	C.GP{iN}.NgtStr=0.1; % Candidate random walk standard deviation - don't change
	C.GP{iN}.AdpItr=1000; % Number of warm up iterations - don't change
	C.GP{iN}.AdpBet=0.05; % Adaptive MC - don't change C.Nep=X.Nep(iN);

	% Isolate threshold exceedances and times of occurrence
	t1=X.Dat-(ones(X.nT,1)*C.QR{iN}.PrmUse(1)); % Threshold exceedances
	C.GP{iN}.Exc=t1(t1>0);
	C.GP{iN}.ExcTim=X.Tim(t1>0);

	% Constant starting solution from GP fit
	t=gpfit(C.GP{iN}.Exc);
	if t(1)<-0.5; t(1)=-0.4; end;
	if t(1)>0.5; t(1)=0.4; end;
	C.GP{iN}.PrmStr=[t(1);t(2)];

	C.GP{iN}=McmcStt(X,C.GP{iN}); % Run MCMC algorithm

	tStr=sprintf('Mdl%s-Nep%g',C.GP{iN}.Lkl,C.GP{iN}.Nep); pDatStm(tStr); pGI(tStr,2); % Save plot
	tFil=sprintf('MCMC'); save(tFil,'C'); % Save whole chain

	end;

	end;

	%% Plot tails per threshold, RV with threshold, and parameter estimates nicely
	if 1;

	RtrPrd=100; % return period to use in years
	nRls=1000; % number of realisations of tails to generate
	C=GpPltTal(C,X,RtrPrd,nRls); % plot of tails per threshold, and RV quantiles with threshold
	C=GpPltPrm(C); % nice plot of QR-Pss-GP parameters for all thresholds

	end;

	%% Update output file
	if 1;

	tFil=sprintf('MCMC'); save(tFil,'C'); % Save whole chain

	end;