00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <cmath>
00025 #include <cfloat>
00026 #include <cassert>
00027 #include <cstdlib>
00028 #include <Sequence/SimData.hpp>
00029 #include <Sequence/Recombination.hpp>
00030 #include <Sequence/PolySIM.hpp>
00031 #include <Sequence/SeqConstants.hpp>
00032 #include <Sequence/PolySNPimpl.hpp>
00037 using namespace Sequence::Recombination;
00038
00039 namespace Sequence
00040 {
00041 PolySIM::PolySIM (const Sequence::SimData * data):
00042 PolySNP(data,false,0,true)
00046 {
00047 rep->_NumPoly = NumPoly();
00048 }
00049
00050 PolySIM::~PolySIM(void)
00051 {
00052 }
00053
00054 double
00055 PolySIM::ThetaPi (void)
00060 {
00061 if (rep->_know_pi == false)
00062 {
00063 double Pi = 0.0, nsam = double(rep->_nsam);
00064 for( std::vector<stateCounter>::const_iterator i = rep->_counts.begin() ;
00065 i != rep->_counts.end() ; ++i)
00066 {
00067 Pi += (2.0 * i->one *(nsam-i->one)) / (nsam*(nsam-1.));
00068 }
00069 rep->_pi = Pi;
00070 rep->_know_pi = true;
00071 }
00072 return rep->_pi;
00073 }
00074
00075 double
00076 PolySIM::ThetaW (void)
00082 {
00083 return (rep->_NumPoly>0) ? (double(rep->_NumPoly)/a_sub_n() ) : 0.;
00084 }
00085
00086
00087 double
00088 PolySIM::ThetaH (void)
00094 {
00095 double H = 0.0, nsam = double(rep->_nsam);
00096
00097 for( std::vector<stateCounter>::const_iterator i = rep->_counts.begin();
00098 i != rep->_counts.end() ; ++i)
00099 {
00100 H += (i->one < nsam) ? (2.0 * i->one * i->one)/ (nsam * (nsam - 1.0)) : 0.;
00101 }
00102 return H;
00103 }
00104
00105 double
00106 PolySIM::ThetaL (void)
00112 {
00113 if(rep->_NumPoly ==0) return 0.;
00114 const char state = '1';
00115 unsigned site;
00116 unsigned seq;
00117 int num_changes;
00118 double thetal = 0.0, nc, nsam = double(rep->_nsam);
00119
00120 for (site = 0; site < rep->_data->numsites (); ++site)
00121 {
00122 for (seq = 0, num_changes = 0; seq < unsigned (nsam); ++seq)
00123 {
00124 num_changes += (*rep->_data)[seq][site] == state ? 1 : 0;
00125 }
00126 nc = double (num_changes);
00127 thetal += nc / (nsam - 1.0);
00128 }
00129 return thetal;
00130 }
00131
00132 double
00133 PolySIM::TajimasD (void)
00134 {
00135 if(rep->_NumPoly ==0) return strtod("NAN",NULL);
00136 double Pi = ThetaPi ();
00137 double W = ThetaW ();
00138 return ((Pi - W) / Dnominator ());
00139 }
00140
00141 double
00142 PolySIM::Hprime (bool likeThorntonAndolfatto)
00147 {
00148 if(rep->_NumPoly ==0) return strtod("NAN",NULL);
00149 double Hpr = 0.0;
00150 double pi = ThetaPi ();
00151 double theta = ThetaW();
00152 double thetal = ThetaL();
00153 double a = a_sub_n ();
00154 double b = b_sub_n ();
00155 double b_n_plus1 = b_sub_n_plus1();
00156 double S = rep->_NumPoly;
00157 double thetasq = (likeThorntonAndolfatto == false) ? S * (S-1)/(a*a + b) : theta*theta;
00158
00159 double vThetal =
00160 (rep->_nsam * theta)/(2.0 * (rep->_nsam - 1.0))
00161 + (2.0 * pow(rep->_nsam/(rep->_nsam - 1.0), 2.0) * (b_n_plus1 - 1.0) - 1.0) * thetasq;
00162
00163 double vPi =
00164 (3.0 * rep->_nsam *(rep->_nsam + 1.0) * theta
00165 + 2.0 * ( rep->_nsam * rep->_nsam + rep->_nsam + 3.0) * thetasq )
00166 / (9 * rep->_nsam * (rep->_nsam -1.0));
00167
00168 double cov =
00169 (rep->_nsam + 1.0) * theta / (3.0 * (rep->_nsam - 1.0))
00170 + ( 7.0 * rep->_nsam * rep->_nsam + 3.0 * rep->_nsam - 2.0 - 4.0 *
00171 rep->_nsam *( rep->_nsam + 1.0) * b_n_plus1)
00172 * thetasq / (2.0 * pow ((rep->_nsam - 1.0), 2.0));
00173
00174 Hpr = pi - thetal;
00175 Hpr /= pow ( (vThetal + vPi - 2.0 * cov), 0.5);
00176 return (Hpr);
00177
00178 }
00179
00180 double
00181 PolySIM::Dnominator (void)
00182 {
00183 if(rep->_NumPoly ==0) return strtod("NAN",NULL);
00184 double S = rep->_NumPoly;
00185 double a1, a2, b1, b2, c1, c2, e1, e2;
00186
00187 a1 = a_sub_n ();
00188 a2 = b_sub_n ();
00189 b1 = (rep->_nsam + 1.0) / (3.0 * (rep->_nsam - 1.0));
00190 b2 = (2.0 * (pow (rep->_nsam, 2.0) + rep->_nsam + 3.0)) / (9.0 * rep->_nsam *
00191 (rep->_nsam - 1.0));
00192 c1 = b1 - 1.0 / a1;
00193 c2 = b2 - (rep->_nsam + 2.0) / (a1 * rep->_nsam) + a2 / pow (a1, 2.0);
00194 e1 = c1 / a1;
00195 e2 = c2 / (pow (a1, 2.0) + a2);
00196 double denominator = pow ((e1 * S + e2 * S * (S - 1.0)), 0.5);
00197 return (denominator);
00198 }
00199
00200
00201 int
00202 PolySIM::HudsonsHaplotypeTest (int subsize, int subss)
00212 {
00213 int *subslist, i, npoly, seq;
00214 bool sflag, flag;
00215 flag = 0;
00216 sflag = 1;
00217
00218 subslist = new int[subsize];
00219
00220 for (i = 0; i < subsize; ++i)
00221 subslist[i] = i;
00222
00223 while (sflag)
00224 {
00225 npoly = poly (subslist, rep->_nsites,
00226 subsize, subss, &seq);
00227 if (npoly > subss)
00228 sflag = nextsample (subslist, subsize, int(rep->_nsam), seq);
00229 else
00230 {
00231 flag = 1;
00232 break;
00233 }
00234 }
00235 if (flag == 1)
00236 {
00237 delete[]subslist;
00238 return (1);
00239 }
00240 else
00241 {
00242 delete[]subslist;
00243 return (0);
00244 }
00245 }
00246
00247 int
00248 PolySIM::poly (int *subslist, int ss,
00249 int subsize, int subss, int *seq)
00257 {
00258 int npoly = 0, i, j, *polyvector;
00259
00260 polyvector = new int[ss];
00261 for (i = 0; i < ss; ++i)
00262 polyvector[i] = 0;
00263
00264 for (i = 1; i < subsize; ++i)
00265 for (j = 0; j < ss; ++j)
00266 if ((*rep->_data)[subslist[i]][j] !=
00267 (*rep->_data)[subslist[0]][j])
00268 {
00269 if (polyvector[j] == 0)
00270 {
00271 polyvector[j] = 1;
00272 npoly++;
00273 if (npoly == subss + 1)
00274 *seq = i;
00275 }
00276 }
00277 delete [] polyvector;
00278 return (npoly);
00279 }
00280
00281 int
00282 PolySIM::nextsample (int *subslist, int subsize, int nsam, int seq)
00290 {
00291 int i;
00292 subslist[seq]++;
00293 if (subslist[seq] > nsam - subsize + seq)
00294 {
00295 seq--;
00296
00297 if (seq < 0)
00298 return (0);
00299
00300 return (nextsample (subslist, subsize, nsam, seq));
00301 }
00302
00303 for (i = seq + 1; i < subsize; i++)
00304 subslist[i] = subslist[i - 1] + 1;
00305 return (1);
00306 }
00307
00308 double
00309 PolySIM::FuLiD (void)
00310 {
00311 if(rep->_NumPoly ==0) return strtod("NAN",NULL);
00312 double D = 0.0;
00313 double ExternalMutations = double (NumExternalMutations ());
00314 double NumMut = double (NumMutations ());
00315 double a = a_sub_n ();
00316 double b = b_sub_n ();
00317 double c = c_sub_n ();
00318
00319 double vD = 1.0 +
00320 (pow (a, 2.0) / (b + pow (a, 2.0)) *
00321 (c - (rep->_nsam + 1.0) / (rep->_nsam - 1.0)));
00322 double uD = a - 1.0 - vD;
00323 D = NumMut - a * double (ExternalMutations);
00324 D /= pow ((uD * NumMut + vD * pow (NumMut, 2.0)), 0.5);
00325 return (D);
00326 }
00327
00328 double
00329 PolySIM::FuLiF (void)
00330 {
00331 if(rep->_NumPoly ==0) return strtod("NAN",NULL);
00332 double F = 0.0;
00333 double Pi = ThetaPi ();
00334 double NumMut = double (NumMutations ());
00335 double ExternalMutations = double (NumExternalMutations ());
00336 double a = a_sub_n ();
00337 double a_n_plus1 = a_sub_n_plus1 ();
00338 double b = b_sub_n ();
00339 double c = c_sub_n ();
00340 double vF = c + 2.0 * (pow (rep->_nsam, 2.0) + rep->_nsam +
00341 3.0) / (9.0 * rep->_nsam * (double (rep->_nsam - 1.0)));
00342 vF -= (2.0 / (rep->_nsam - 1.0));
00343 vF /= (pow (a, 2.0) + b);
00344
00345 double uF = 1.0 + (rep->_nsam + 1.0) / (3.0 * (double (rep->_nsam - 1.0)));
00346 uF -= 4.0 * ((rep->_nsam + 1.0) / (pow (rep->_nsam - 1.0, 2.0))) *
00347 (a_n_plus1 - 2.0 * rep->_nsam / (rep->_nsam + 1.0));
00348 uF /= a;
00349 uF -= vF;
00350
00351 F = Pi - ExternalMutations;
00352 F /= pow (uF * NumMut + vF * pow (NumMut, 2.0), 0.5);
00353 return (F);
00354 }
00355
00356 double
00357 PolySIM::FuLiDStar (void)
00358 {
00359 if(rep->_NumPoly ==0) return strtod("NAN",NULL);
00360 double DStar = 0.0;
00361 double Singletons = double (NumSingletons ());
00362 double NumMut = double (NumMutations ());
00363
00364 double a = a_sub_n ();
00365 double b = b_sub_n ();
00366 double d = d_sub_n ();
00367
00368 double vD = pow (rep->_nsam / (rep->_nsam - 1.0), 2.0) * b;
00369 vD += pow (a, 2.0) * d;
00370 vD -= 2.0 * (rep->_nsam * a * (a + 1.0)) /
00371 (pow (double (rep->_nsam - 1.0), 2.0));
00372 vD /= (pow (a, 2.0) + b);
00373
00374 double uD =
00375 (rep->_nsam / (rep->_nsam - 1.0)) * (a -
00376 (rep->_nsam / (rep->_nsam - 1.0))) - vD;
00377
00378 DStar = (rep->_nsam / (rep->_nsam - 1.0)) * NumMut - a * double (Singletons);
00379 DStar /= pow (uD * NumMut + vD * pow (NumMut, 2.0), 0.5);
00380 return (DStar);
00381 }
00382
00383 double
00384 PolySIM::FuLiFStar (void)
00385 {
00386 if(rep->_NumPoly ==0) return strtod("NAN",NULL);
00387 double FStar = 0.0;
00388 double Singletons = double (NumSingletons ());
00389 double Pi = ThetaPi ();
00390 double NumMut = double (NumMutations ());
00391
00392 double a = a_sub_n ();
00393 double a_n_plus1 = a_sub_n_plus1 ();
00394 double b = b_sub_n ();
00395
00396
00397 double vF = 2.0 * pow (rep->_nsam, 3.0) + 110.0 * pow (rep->_nsam, 2.0) - 255.0 * rep->_nsam + 153.0;
00398 vF /= (9.0 * pow (rep->_nsam, 2.0) * (rep->_nsam - 1.0));
00399 vF += (((2.0 * (rep->_nsam - 1.0) * a) / pow (rep->_nsam, 2.0)) - (8.0 * b / rep->_nsam));
00400 vF /= (pow (a, 2.0) + b);
00401
00402 double uF = (4.0 * pow (rep->_nsam, 2.0) + 19.0 * rep->_nsam + 3.0 - 12.0 * (rep->_nsam + 1.0) * a_n_plus1);
00403 uF /= (3.0 * rep->_nsam * (rep->_nsam - 1.0));
00404 uF /= a;
00405 uF -= vF;
00406 FStar = Pi - (((rep->_nsam - 1.0) / rep->_nsam)) * double (Singletons);
00407 FStar /= pow ((uF * NumMut + vF * pow (NumMut, 2.0)), 0.5);
00408 return (FStar);
00409 }
00410
00411 unsigned
00412 PolySIM::NumMutations (void)
00416 {
00417 return rep->_NumPoly;
00418 }
00419
00420
00421
00422 unsigned
00423 PolySIM::NumSingletons (void)
00429 {
00430 if (rep->_counted_singletons == false)
00431 {
00432 unsigned singletonCount=0;
00433
00434 singletonCount = 0;
00435 for(std::vector<stateCounter>::const_iterator i = rep->_counts.begin() ;
00436 i != rep->_counts.end() ; ++i)
00437 {
00438 singletonCount += (i->one == 1 || i->zero == 1) ? 1 : 0;
00439 }
00440 rep->_counted_singletons = true;
00441 rep->_singletons = singletonCount;
00442 return (singletonCount);
00443 }
00444 else
00445 return rep->_singletons;
00446
00447 return rep->_singletons;
00448 }
00449
00450
00451 unsigned
00452 PolySIM::NumExternalMutations (void)
00462 {
00463 unsigned numExternal = 0;
00464 for( std::vector<stateCounter>::const_iterator i = rep->_counts.begin() ;
00465 i != rep->_counts.end() ; ++i )
00466 {
00467 if (i->one == 1)
00468 {
00469 ++numExternal;
00470 }
00471 }
00472 return (numExternal);
00473 }
00474
00475 unsigned
00476 PolySIM::Minrec (void)
00483 {
00484 if(rep->_NumPoly < 2) return SEQMAXUNSIGNED;
00485 unsigned a, b, e, numgametes, Rmin=0,x=0;
00486 bool flag = false;
00487
00488 if (rep->_NumPoly < 2 || x >= (rep->_NumPoly - 1))
00489 return (0);
00490 for (a = x + 1; a < rep->_nsites; ++a)
00491 {
00492 for (b = (flag == false) ? x : a-1 ; b < a; ++b)
00493 {
00494 flag = false;
00495 numgametes = 0;
00496 for (e = 0; e < rep->_nsam; ++e)
00497 {
00498 if ((*rep->_data)[e][b] == '0' &&(*rep->_data)[e][a] == '0')
00499 {
00500 ++numgametes;
00501 break;
00502 }
00503 }
00504 for (e = 0; e < rep->_nsam; ++e)
00505 {
00506 if ((*rep->_data)[e][b] == '0' &&(*rep->_data)[e][a] == '1')
00507 {
00508 ++numgametes;
00509 break;
00510 }
00511 }
00512 for (e = 0; e < rep->_nsam; ++e)
00513 {
00514 if ((*rep->_data)[e][b] == '1' &&(*rep->_data)[e][a] == '0')
00515 {
00516 ++numgametes;
00517 break;
00518 }
00519 }
00520 for (e = 0; e < rep->_nsam; ++e)
00521 {
00522 if ((*rep->_data)[e][b] == '1' &&(*rep->_data)[e][a] == '1')
00523 {
00524 ++numgametes;
00525 break;
00526 }
00527 }
00528 if (numgametes == 4)
00529 {
00530 ++Rmin;
00531 flag = true;
00532 break;
00533 }
00534 }
00535 if (flag == true)
00536 {
00537 x = a;
00538 }
00539 }
00540 return Rmin;
00541 }
00542
00543 void PolySIM::WallStats(void)
00544 {
00545 unsigned n00 ,n01 ,n10, n11;
00546 unsigned nhap_curr, nhap_left;
00547 unsigned n0site1,n0site2;
00548 nhap_left = SEQMAXUNSIGNED;
00549
00550 unsigned A = 0;
00551 unsigned S = rep->_NumPoly;
00552 if (S > 1)
00553 {
00554 for (unsigned site1 = 0 ; site1 < rep->_nsites-1 ; ++site1)
00555
00556 {
00557 for (unsigned site2 = site1+1 ; site2 < rep->_nsites ; ++site2)
00558 {
00559 nhap_curr =0;
00560 n00 = n01 = n10 = n11 = n0site1 = n0site2 = 0;
00561 for (unsigned i = 0 ; i < rep->_nsam ; ++i)
00562 {
00563 switch ( (*rep->_data)[i][site1] )
00564 {
00565 case '0':
00566 ++n0site1;
00567 switch ( (*rep->_data)[i][site2] )
00568 {
00569 case '0':
00570 ++n0site2;
00571 ++n00;
00572 break;
00573 case '1':
00574 ++n01;
00575 break;
00576 }
00577 break;
00578 case '1':
00579 switch ( (*rep->_data)[i][site2] )
00580 {
00581 case '0':
00582 ++n0site2;
00583 ++n10;
00584 break;
00585 case '1':
00586 ++n11;
00587 break;
00588 }
00589 break;
00590 }
00591 }
00592
00593
00594 if ( (n0site1 > 0 && n0site1 < rep->_nsam) &&
00595 (n0site2 > 0 && n0site2 < rep->_nsam) )
00596 {
00597 nhap_curr += (n00 > 0) ? 1 : 0;
00598 nhap_curr += (n01 > 0) ? 1 : 0;
00599 nhap_curr += (n10 > 0) ? 1 : 0;
00600 nhap_curr += (n11 > 0) ? 1 : 0;
00601 if (site1 == 0)
00602 {
00603 if (nhap_curr == 2)
00604 {
00605 ++rep->_walls_Bprime;
00606 ++A;
00607 }
00608 }
00609 else
00610 {
00611 if (nhap_curr == 2)
00612 ++rep->_walls_Bprime;
00613 if (nhap_curr == 2 && nhap_left != 2)
00614 ++A;
00615 }
00616 site1=site2;
00617 }
00618 nhap_left = nhap_curr;
00619 }
00620 }
00621 rep->_walls_B = double(rep->_walls_Bprime)/(double(S-1));
00622 rep->_walls_Q = (rep->_walls_Bprime + double(A))/(double(S));
00623 }
00624 else
00625 {
00626 rep->_walls_B = strtod("NAN",NULL);
00627 rep->_walls_Bprime=0;
00628 rep->_walls_Q = strtod("NAN",NULL);
00629 }
00630 rep->_calculated_wall_stats=true;
00631 }
00632
00633 double PolySIM::WallsB(void)
00634 {
00635 if (rep->_calculated_wall_stats == false)
00636 WallStats();
00637 return rep->_walls_B;
00638 }
00639 unsigned PolySIM::WallsBprime(void)
00640 {
00641 if (rep->_calculated_wall_stats == false)
00642 WallStats();
00643 return rep->_walls_Bprime;
00644 }
00645 double PolySIM::WallsQ(void)
00646 {
00647 if (rep->_calculated_wall_stats == false)
00648 WallStats();
00649 return rep->_walls_Q;
00650 }
00651 }