[docs]defdensity_bandwidth(x,method="KernSmooth",resolution=401):"""**Bandwidth Selection for Density Estimation** Bandwidth selector for :func:`.density` estimation. See ``bw_method`` argument in :func:`.scipy.stats.gaussian_kde`. The ``"KernSmooth"`` method is adapted from the ``dpik()`` function from the *KernSmooth* R package. In this case, it estimates the optimal AMISE bandwidth using the direct plug-in method with 2 levels for the Parzen-Rosenblatt estimator with Gaussian kernel. Parameters ----------- x : Union[list, np.array, pd.Series] A vector of values. method : float The bandwidth of the kernel. The larger the values, the smoother the estimation. Can be a number, or ``"scott"`` or ``"silverman"`` (see ``bw_method`` argument in :func:`.scipy.stats.gaussian_kde`), or ``"KernSmooth"``. resolution : int Only when ``method="KernSmooth"``. The number of equally-spaced points over which binning is performed to obtain kernel functional approximation (see ``gridsize`` argument in ``KernSmooth::dpik()``). Returns ------- float Bandwidth value. See Also -------- density Examples -------- .. ipython:: python import neurokit2 as nk x = np.random.normal(0, 1, size=100) bw = nk.density_bandwidth(x) bw nk.density_bandwidth(x, method="scott") nk.density_bandwidth(x, method=1) @savefig p_density_bandwidth.png scale=100% x, y = nk.density(signal, bandwidth=bw, show=True) @suppress plt.close() References ---------- * Jones, W. M. (1995). Kernel Smoothing, Chapman & Hall. """ifisinstance(method,str):method=method.lower()ifisinstance(method,(float,int))ormethod!="kernsmooth":returnscipy.stats.gaussian_kde(x,bw_method=method).factorn=len(x)stdev=np.nanstd(x,ddof=1)iqr=np.diff(np.percentile(x,[25,75]))[0]/1.349scalest=min(stdev,iqr)data_scaled=(x-np.nanmean(x))/scalestmin_scaled=np.nanmin(data_scaled)max_scaled=np.nanmax(data_scaled)gcounts=_density_linearbinning(x=data_scaled,gpoints=np.linspace(min_scaled,max_scaled,resolution),truncate=True,)alpha=(2*np.sqrt(2)**9/(7*n))**(1/9)psi6hat=_density_bkfe(gcounts,6,alpha,min_scaled,max_scaled)alpha=(-3*np.sqrt(2/np.pi)/(psi6hat*n))**(1/7)psi4hat=_density_bkfe(gcounts,4,alpha,min_scaled,max_scaled)delta_0=1/((4*np.pi)**(1/10))output=scalest*delta_0*(1/(psi4hat*n))**(1/5)returnoutput
def_density_linearbinning(x,gpoints,truncate=True):""" Linear binning. Adapted from KernSmooth R package. """n=len(x)M=gpoints.shape[0]a=gpoints[0]b=gpoints[-1]# initialization of gcounts:gcounts=np.zeros(M)Delta=(b-a)/(M-1)foriinrange(n):lxi=((x[i]-a)/Delta)+1li=int(lxi)rem=lxi-liif(li>=1)and(li<M):gcounts[li-1]=gcounts[li-1]+1-remgcounts[li]=gcounts[li]+remelif(li<1)and(truncateisFalse):gcounts[0]=gcounts[0]+1elif(li>=M)and(truncateisFalse):gcounts[M-1]=gcounts[M-1]+1returngcountsdef_density_bkfe(gcounts,drv,h,a,b):""" 'bkfe' function adapted from KernSmooth R package. """resol=len(gcounts)# Set the sample size and bin widthn=np.nansum(gcounts)delta=(b-a)/(resol-1)# Obtain kernel weightstau=drv+4L=min(int(tau*h/delta),resol)ifL==0:warnings.warn("WARNING : Binning grid too coarse for current (small) bandwidth: consider increasing 'resolution'")lvec=np.arange(L+1)arg=lvec*delta/hdnorm=np.exp(-np.square(arg)/2)/np.sqrt(2*np.pi)kappam=dnorm/h**(drv+1)hmold0=1hmold1=arghmnew=1ifdrv>=2:foriinnp.arange(2,drv+1):hmnew=arg*hmold1-(i-1)*hmold0hmold0=hmold1# Compute mth degree Hermite polynomialhmold1=hmnew# by recurrence.kappam=hmnew*kappam# Now combine weights and counts to obtain estimateP=2**(int(np.log(resol+L+1)/np.log(2))+1)kappam=np.concatenate((kappam,np.zeros(P-2*L-1),kappam[1:][::-1]),axis=0)Gcounts=np.concatenate((gcounts,np.zeros(P-resol)),axis=0)kappam=np.fft.fft(kappam)Gcounts=np.fft.fft(Gcounts)gcounter=gcounts*(np.real(np.fft.ifft(kappam*Gcounts)))[0:resol]returnnp.nansum(gcounter)/n**2