#!/usr/bin/perl -w 

$step=15;
$bin=30;

$zerolimit = 300;

use Statistics::Test::WilcoxonRankSum;
 
my $wilcox_test = Statistics::Test::WilcoxonRankSum->new();
 
#my @dataset_1 = (4.6, 4.7, 4.9, 5.1, 5.2, 5.5, 5.8, 6.1, 6.5, 6.5, 7.2);
#my @dataset_2 = (5.2, 5.3, 5.4, 5.6, 6.2, 6.3, 6.8, 7.7, 8.0, 8.1);
#$wilcox_test->load_data(\@dataset_1, \@dataset_2);
#my $prob = $wilcox_test->probability();
#my $pf = sprintf '%f', $prob; # prints 0.091022
#print "$pf\n\n\n$prob\n\n\n";
#exit;

#print $wilcox_test->probability_status();

#wt <- "/disk_aug18_a/jan2019/processed_data100/smit_files/hs1A_50/"
#wtpb <- "/disk_aug18_a/jan2019/processed_data100/smit_files/hs1B_51/"
#y2h2 <- "/disk_aug18_a/jan2019/processed_data100/smit_files/hs1C_52/"
#y2h2pb <- "/disk_aug18_a/jan2019/processed_data100/smit_files/hs1D_53/"


$pre = "/disk_aug18_a/jan2019/processed_data100/smit_files";

@dirs = qw(
hs1A_50
hs1B_51
hs1C_52
hs1D_53
);

@info = qw(wt wtpb y2h2 y2h2pb);

#hs1A compared to hs1B (0 1)
#hs1A compared to hs1C (0 2) 
#hs1B compared to hs1D (1 3)
#hs1C compared to hs1D (2 3)

print "gene info0_info1 z01 prob01 log_prob01 diff01 info0_info2 z02 prob02 log_prob02 diff02 info1_info3  z13 prob13 log_prob13 diff13 info2_info3 z23 prob23 log_prob23 diff23\n";


foreach $d (@dirs){
system("ls $pre/$d/*splicingValues.txt > tlist");
open(IF,"tlist");
while($a=<IF>){
chomp($a);
@cc = split(/\//,$a);
$gene = $cc[6];
$ghash{$gene}++;
#print "filling $d\n$gene\n";
open(IF2,$a);
$aa = <IF2>;
while($aa=<IF2>){
chomp($aa);
@bb = split(/\t/,$aa);
$pos = $bb[0]; $val = $bb[1]; $splraw = $bb[2] ; $splnorm = $bb[3];
#relPos	readSum	splicingValueRaw	splicingValueNorm
#-100	0	NA	0
#-99	0	NA	0
$splnorm =~ s/NA/0/g;
$poshash{$d}{$gene}  .= "$pos,";
$valhash{$d}{$gene}  .= "$splnorm,";
#print "$d\n";print "$a\n";exit;
}
close(IF2);
}
close(IF);
}

foreach $gene (sort keys %ghash){
$d0 = $dirs[0];$d1 = $dirs[1];$d2 = $dirs[2];$d3 = $dirs[3];
$nzero0=0; $nzero1=0; $nzero2=0; $nzero3=0;
$nfail0=0; $nfail1=0; $nfail2=0; $nfail3=0;
$prob01 = 'NA';$prob02 = 'NA';$prob13 = 'NA';$prob23 = 'NA';
$z01 = 'z:NA';$z02 = 'z:NA';$z13 = 'z:NA';$z23 = 'z:NA';

@fixed_0 = ();@fixed_1 = ();@fixed_2 = ();@fixed_3 = ();
@binned_0 = ();@binned_1 = ();@binned_2 = ();@binned_3 = ();

if(exists $poshash{$d0}{$gene}){
@dataset_0 = split(/,/,$valhash{$d0}{$gene});
for($i=0;$i<400;$i++){
if(exists $dataset_0[$i]){
$fixed_0[$i] = $dataset_0[$i];
if($fixed_0[$i]==0){$nzero0++;}
}
else{$nfail0++;}
}
}

if(exists $poshash{$d1}{$gene}){
@dataset_1 = split(/,/,$valhash{$d1}{$gene});
for($i=0;$i<400;$i++){
if(exists $dataset_1[$i]){
$fixed_1[$i] = $dataset_1[$i];
if($fixed_1[$i]==0){$nzero1++;}
}
else{$nfail1++;}
}
}

if(exists $poshash{$d2}{$gene}){
@dataset_2 = split(/,/,$valhash{$d2}{$gene});
for($i=0;$i<400;$i++){
if(exists $dataset_2[$i]){
$fixed_2[$i] = $dataset_2[$i];
if($fixed_2[$i]==0){$nzero2++;}
}
else{$nfail2++;}
}
}

if(exists $poshash{$d3}{$gene}){
@dataset_3 = split(/,/,$valhash{$d3}{$gene});
for($i=0;$i<400;$i++){
if(exists $dataset_3[$i]){
$fixed_3[$i] = $dataset_3[$i];
if($fixed_3[$i]==0){$nzero3++;}
}
else{$nfail3++;}
}
}

#plot
#bin




if($nfail0==0 && $nfail1==0 && $nzero0<$zerolimit && $nzero1<$zerolimit){
#0 1
$wilcox_test->load_data(\@fixed_0, \@fixed_1);
$prob01 = $wilcox_test->probability();
@ooo = split(/,/, $wilcox_test->probability_status()) ;
$z01 = $ooo[3];
$z01 =~ s/ //g;
}


if($nfail0==0 && $nfail2==0 && $nzero0<$zerolimit && $nzero2<$zerolimit){
#0 2
$wilcox_test->load_data(\@fixed_0, \@fixed_2);
$prob02 = $wilcox_test->probability();
@ooo = split(/,/, $wilcox_test->probability_status()) ;
$z02 = $ooo[3];
$z02 =~ s/ //g;
}


if($nfail1==0 && $nfail3==0 && $nzero1<$zerolimit && $nzero3<$zerolimit){
#1 3
$wilcox_test->load_data(\@fixed_1, \@fixed_3);
$prob13 = $wilcox_test->probability();
@ooo = split(/,/, $wilcox_test->probability_status()) ;
$z13 = $ooo[3];
$z13 =~ s/ //g;
}


#2 3
if($nfail2==0 && $nfail3==0 && $nzero2<$zerolimit && $nzero3<$zerolimit){
$wilcox_test->load_data(\@fixed_2, \@fixed_3);
$prob23 = $wilcox_test->probability();
@ooo = split(/,/, $wilcox_test->probability_status()) ;
$z23 = $ooo[3];
$z23 =~ s/ //g;
}



#hs1A compared to hs1B (0 1)
#hs1A compared to hs1C (0 2) 
#hs1B compared to hs1D (1 3)
#hs1C compared to hs1D (2 3)
#print "$gene $d0 $d1 $prob0 $d2 $d3 $prob23\n";
print "$nzero0 $nzero1 $nzero2 $nzero3\n";
print "$gene $info[0]_$info[1] $z01 $prob01 $info[0]_$info[2] $z02 $prob02 $info[1]_$info[3] $z13 $prob13 $info[2]_$info[3] $z23 $prob23 \n";


}






