
#include<R.h>
#include<Rmath.h>
#include<R_ext/BLAS.h>
#include<memory.h>

// Comparison function for quick sort in descending order
int cmpfunc(const void * a, const void * b)
{
	int x = 0;
	if(*(double *)a > *(double *)b)
		x = -1;
	if(*(double *)a < *(double *)b)
		x = 1;
	return(x);
}


// sign(tR_X_l - tR_X_k) (ranks are computed in descending order, i.e., small values have large ranks)
double rank_sign(double Xk, double Xl, int dk, int dl)
{
	double temp = 0.0;
	if(Xk > Xl && ((dk==1 && dl==0) || (dk==1 && dl==1)))
		temp = 1.0;
	if(Xk < Xl && ((dk==0 && dl==1) || (dk==1 && dl==1)))
		temp = -1.0;
	return(temp);
}


// random permutation
void rand_perm(int p, int *I)
{
	int i, j;
	int temp;
	for(i = p - 1; i > 0; i--)
	{
		j = (int) (unif_rand()*p) % (i + 1);
		temp = I[i];
		I[i] = I[j];
		I[j] = temp;
	}
}


void calc_trc(int *N, double *X, double *Y, int *dx, int *dy, double *out)
{
	int i,j,n = *N;
	//int n2 = n*(n-1)/2;
	double temp = 0;
	double tau;
	double a2, b2,L,M,C;
	
	a2 = 0;
	b2 = 0;
	for(i=0;i<(n-1);i++)
	{
		for(j=i+1;j<n;j++)
		{
			L = rank_sign(X[i], X[j], dx[i], dx[j]);
			M = rank_sign(Y[i], Y[j], dy[i], dy[j]);
			temp = temp + L*M;
			a2 = a2 + L*L;
			b2 = b2 + M*M;
		}
	}

	// tau
	C = sqrt(a2*b2);
	if(C==0)
		tau=0;
	else
		tau = temp/C;

	*out = tau;
	
}

// Kendall's tau
void k_tau(int *N, double *X, double *Y, double *out)
{
	int i, n=*N;
	int *one_vec;
	one_vec = malloc(n * sizeof(int));
	for (i = 0; i < n; i++)
		one_vec[i] = 1;

	calc_trc(N, X, Y, one_vec, one_vec, out);

	free(one_vec);
}

// Pearson's rho
void rho(int *N, double *X, double *Y, double *out)
{
	int i, n = *N, IntOne = 1;
	double *one_vec;
	double neg_x_mean = 0, neg_y_mean = 0, x_sd = 0, y_sd = 0, cov = 0;
	double *X_c, *Y_c;
	one_vec = malloc(n * sizeof(double));
	X_c = malloc(n * sizeof(double));
	Y_c = malloc(n * sizeof(double));

	for (i = 0; i < n; i++)
		one_vec[i] = 1.0;

	memcpy(X_c, X, n * sizeof(double));
	memcpy(Y_c, Y, n * sizeof(double));

	neg_x_mean = F77_NAME(ddot)(&n, X, &IntOne, one_vec, &IntOne) / (double)(-n);
	neg_y_mean = F77_NAME(ddot)(&n, Y, &IntOne, one_vec, &IntOne) / (double)(-n);

	F77_NAME(daxpy)(&n, &neg_x_mean, one_vec, &IntOne, X_c, &IntOne);
	F77_NAME(daxpy)(&n, &neg_y_mean, one_vec, &IntOne, Y_c, &IntOne);

	x_sd = sqrt(F77_NAME(ddot)(&n, X_c, &IntOne, X_c, &IntOne) / (double)(n - 1));
	y_sd = sqrt(F77_NAME(ddot)(&n, Y_c, &IntOne, Y_c, &IntOne) / (double)(n - 1));
	cov = F77_NAME(ddot)(&n, X_c, &IntOne, Y_c, &IntOne) / (double)(n - 1);

	*out = cov / (x_sd*y_sd);

	free(one_vec);
	free(X_c);
	free(Y_c);
}


void km_trc(int *N, double *X, double *Y, int *dx, int *dy, double *out)
{
	int i,j,n = *N;
	//int n2 = n*(n-1)/2;
	double km_tau = 0;
	double L,M;
	
	for(i=0;i<(n-1);i++)
	{
		for(j=i+1;j<n;j++)
		{
			L = rank_sign(X[i], X[j], dx[i], dx[j]);
			M = rank_sign(Y[i], Y[j], dy[i], dy[j]);
			km_tau = km_tau + L*M;
		}
	}

	// k_m * tau(m)
	*out = km_tau;
}

//TRC tau with a given m0 value
void trc_tau(int *N, double *X, double *Y, int *M0, double *tau_est)
{
	int j;
	int n = *N, m = *M0;
	int *dx, *dy;
	double tau;
	double thr1=0, thr2=0;
	double *Sx, *Sy, *Xp, *Yp;
	//double *p_tau, *Xp;

	*tau_est = 0;

	Xp = malloc(n*sizeof(double));
	memset(Xp, 0, n*sizeof(double));
	Yp = malloc(n*sizeof(double));
	memset(Yp, 0, n*sizeof(double));

	Sx = malloc(n*sizeof(double));
	memcpy(Sx, X, n*sizeof(double));
	qsort(Sx, n, sizeof(double), cmpfunc);
	
	Sy = malloc(n*sizeof(double));
	memcpy(Sy, Y, n*sizeof(double));
	qsort(Sy, n, sizeof(double), cmpfunc);

	dx = malloc(n*sizeof(int));
	memset(dx, 0, n*sizeof(int));

	dy = malloc(n*sizeof(int));
	memset(dy, 0, n*sizeof(int));

	if (m < n)
	{
		thr1 = Sx[m];
		thr2 = Sy[m];
	}
	if (m == n)
	{
		thr1 = Sx[m-1];
		thr2 = Sy[m-1];
	}

	memcpy(Xp, X, n*sizeof(double));
	memcpy(Yp, Y, n*sizeof(double));

	for(j=0;j<n;j++)
	{
		if(Xp[j]>thr1)
			dx[j] = 1;
		else
		{
			dx[j] = 0;
			Xp[j] = thr1;
		}
		if(Yp[j]>thr2)
			dy[j] = 1;
		else
		{
			dy[j] = 0;
			Yp[j] = thr2;
		}
	}
	calc_trc(&n, Xp, Yp, dx, dy, &tau);
	
	*tau_est = tau;

	free(Xp);
	free(Yp);
	free(Sx);
	free(Sy);
	free(dx);
	free(dy);
}

// Estimating null distribution based on the permutation with a given m0 value
void null_perm_m0(int *N, double *X, double *Y, int *NPERM, int *M0, double *perm_tau)
{
	int i, j, k;
	int n = *N, nperm = *NPERM;
	int *indx, *dx, *dx_perm, *dy;
	int m = *M0;
	double tau;
	double thr1=0, thr2=0;
	double *Xp, *Xp_perm, *Yp, *Sx, *Sy;
	//double *p_tau, *Xp;

	//srand((unsigned int) *seed);

	//p_tau = malloc(nperm*sizeof(double));
	memset(perm_tau, 0, nperm*sizeof(double));
	
	Xp = malloc(n * sizeof(double));
	memset(Xp, 0, n * sizeof(double));
	Xp_perm = malloc(n * sizeof(double));
	memset(Xp_perm, 0, n * sizeof(double));
	Yp = malloc(n * sizeof(double));
	memset(Yp, 0, n * sizeof(double));

	Sx = malloc(n * sizeof(double));
	memcpy(Sx, X, n * sizeof(double));
	qsort(Sx, n, sizeof(double), cmpfunc);

	Sy = malloc(n * sizeof(double));
	memcpy(Sy, Y, n * sizeof(double));
	qsort(Sy, n, sizeof(double), cmpfunc);

	dx = malloc(n * sizeof(int));
	memset(dx, 0, n * sizeof(int));
	dx_perm = malloc(n * sizeof(int));
	memset(dx, 0, n * sizeof(int));

	dy = malloc(n * sizeof(int));
	memset(dy, 0, n * sizeof(int));

	if (m < n)
	{
		thr1 = Sx[m];
		thr2 = Sy[m];
	}
	if (m == n)
	{
		thr1 = Sx[m - 1];
		thr2 = Sy[m - 1];
	}
	memcpy(Xp, X, n * sizeof(double));
	memcpy(Yp, Y, n * sizeof(double));

	for (j = 0; j<n; j++)
	{
		if (Xp[j] > thr1)
			dx[j] = 1;
		else
		{
			dx[j] = 0;
			Xp[j] = thr1;
		}
		if (Yp[j] > thr2)
			dy[j] = 1;
		else
		{
			dy[j] = 0;
			Yp[j] = thr2;
		}
	}

	indx = malloc(n*sizeof(int));
	for(i=0;i<n;i++)
		indx[i] = i;

	//Estimation of Null distribution form permuted samples
	for(k=0;k<nperm;k++)
	{
		rand_perm(n, indx);
		// Permuting X
		for (i = 0; i < n; i++)
		{
			Xp_perm[i] = Xp[indx[i]];
			dx_perm[i] = dx[indx[i]];
		}

		calc_trc(&n, Xp_perm, Yp, dx_perm, dy, &tau);
		perm_tau[k] = tau;
    }

	free(Xp);
	free(Yp);
	free(Sx);
	free(Sy);
	free(dx);
	free(dy);
	free(indx);
	free(Xp_perm);
	free(dx_perm);
}


//Calculate km*trc values for m0 values of a given range [st, floor(range_m * n)]
void km_trc_search(int *N, double *X, double *Y, int *st, double *range_m, double *hist_tau)
{
	int i, j;
	int n = *N, m;
	int start= *st;
	int *dx, *dy;
	int miter = (int)floor((double)*range_m * n);
	double tau;
	double thr1=0, thr2=0;
	double *Sx, *Sy, *Xp, *Yp;
	//double *p_tau, *Xp;

	Xp = malloc(n*sizeof(double));
	memset(Xp, 0, n*sizeof(double));
	Yp = malloc(n*sizeof(double));
	memset(Yp, 0, n*sizeof(double));

	Sx = malloc(n*sizeof(double));
	memcpy(Sx, X, n*sizeof(double));
	qsort(Sx, n, sizeof(double), cmpfunc);

	Sy = malloc(n*sizeof(double));
	memcpy(Sy, Y, n*sizeof(double));
	qsort(Sy, n, sizeof(double), cmpfunc);

	dx = malloc(n*sizeof(int));
	memset(dx, 0, n*sizeof(int));

	dy = malloc(n*sizeof(int));
	memset(dy, 0, n*sizeof(int));

	for(i=start;i<miter+1;i++)
	{
		m = i;
		if (m < n)
		{
			thr1 = Sx[m];
			thr2 = Sy[m];
		}
		if (m == n)
		{
			thr1 = Sx[m - 1];
			thr2 = Sy[m - 1];
		}
		memcpy(Xp, X, n*sizeof(double));
		memcpy(Yp, Y, n*sizeof(double));

		for(j=0;j<n;j++)
		{
			if(Xp[j]>thr1)
				dx[j] = 1;
			else
			{
				dx[j] = 0;
				Xp[j] = thr1;
			}
			if(Yp[j]>thr2)
				dy[j] = 1;
			else
			{
				dy[j] = 0;
				Yp[j] = thr2;
			}
		}
		km_trc(&n, Xp, Yp, dx, dy, &tau);
		hist_tau[i-1] = tau;
	
	}
	
	free(Xp);
	free(Yp);
	free(Sx);
	free(Sy);
	free(dx);
	free(dy);
}

// Estimating null distribution of tau(m_hat) based on the permutation with given m0 values in [3,floor(range_m*n)]
void null_perm(int *N, double *X, double *Y, int *NPERM, int *st, double *range_m, double *perm_km_tau,double *perm_tau, double *perm_ktau, double *perm_rho)
{
	int i, j, k;
	int n = *N, nperm = *NPERM;
	int *indx, *dx, *dy;
	int start = *st-1;
	int miter = (int) floor((double)*range_m * n);
	double tau;
	double thr1=0, thr2=0;
	double *Xp, *Tx,*Ty, *Sx, *Sy;
	//double *p_tau, *Xp;

	//srand((unsigned int)*seed);


	//p_tau = malloc(nperm*sizeof(double));
	memset(perm_km_tau, 0, miter*nperm*sizeof(double));
	memset(perm_tau, 0, miter*nperm*sizeof(double));

	Xp = malloc(n*sizeof(double));
	memset(Xp, 0, n*sizeof(double));

	Tx = malloc(n*sizeof(double));
	memset(Tx, 0, n*sizeof(double));

	Ty = malloc(n*sizeof(double));
	memset(Ty, 0, n*sizeof(double));

	Sx = malloc(n*sizeof(double));
	memcpy(Sx, X, n*sizeof(double));
	qsort(Sx, n, sizeof(double), cmpfunc);

	Sy = malloc(n*sizeof(double));
	memcpy(Sy, Y, n*sizeof(double));
	qsort(Sy, n, sizeof(double), cmpfunc);

	dx = malloc(n*sizeof(int));
	memset(dx, 0, n*sizeof(int));

	dy = malloc(n*sizeof(int));
	memset(dy, 0, n*sizeof(int));

	indx = malloc(n*sizeof(int));
	for(i=0;i<n;i++)
		indx[i] = i;

	//Estimation of Null distribution form permuted samples
	for(k=0;k<nperm;k++)
	{
		rand_perm(n, indx);
		// Permuting X
		for(i=0;i<n;i++)
			Xp[i] = X[indx[i]];

		// Kendall's tau
		k_tau(N, Xp, Y,&perm_ktau[k]);

		// Pearson's rho
		rho(N, Xp, Y, &perm_rho[k]);

		for(i=start;i<miter;i++)
		{
			memcpy(Tx, Xp, n*sizeof(double));
			memcpy(Ty, Y, n*sizeof(double));

			if (i < n)
			{
				thr1 = Sx[i];
				thr2 = Sy[i];
			}
			if (i == n)
			{
				thr1 = Sx[i - 1];
				thr2 = Sy[i - 1];
			}
			for(j=0;j<n;j++)
			{
				if(Tx[j]>thr1)
					dx[j] = 1;
				else
				{
					dx[j] = 0;
					Tx[j] = thr1;
				}
				if(Ty[j]>thr2)
					dy[j] = 1;
				else
				{
					dy[j] = 0;
					Ty[j] = thr2;
				}
			}
			km_trc(&n, Tx, Ty, dx, dy, &tau);
			perm_km_tau[i*nperm+k]=tau;
			tau=0;
			calc_trc(&n, Tx, Ty, dx, dy, &tau);
			perm_tau[i*nperm+k]=tau;
		}
	}

	free(Xp);
	free(Tx);
	free(Ty);
	free(Sx);
	free(Sy);
	free(dx);
	free(dy);
	free(indx);
}
