00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <Sequence/PolyTable.hpp>
00025 #include <Sequence/stateCounter.hpp>
00026 #include <iostream>
00027 #include <cctype>
00035 namespace Sequence
00036 {
00037 PolyTable::PolyTable(const size_t & nsam, const size_t nsnps )
00038
00044 : positions( std::vector<double>(nsnps,0.) ),
00045 data( std::vector< std::string >( nsam, std::string(nsnps,' ') ) ),
00046 pv(Sequence::polySiteVector()),
00047 non_const_access(true)
00048 {}
00049
00050 PolyTable::PolyTable(PolyTable::const_site_iterator beg,
00051 PolyTable::const_site_iterator end) :
00052 pv(Sequence::polySiteVector()),
00053 non_const_access(true)
00054 {
00055 if (beg>=end)
00056 {
00057 return;
00058 }
00059 assign(beg,end);
00060 }
00061
00062 PolyTable::~PolyTable (void)
00063 {
00064 }
00065
00066 bool PolyTable::empty() const
00067 {
00068 return data.empty()||positions.empty();
00069 }
00070
00071 bool PolyTable::assign(PolyTable::const_site_iterator beg,
00072 PolyTable::const_site_iterator end)
00073
00083 {
00084 non_const_access = true;
00085 positions.clear();
00086 data.clear();
00087 if(std::distance(beg,end) < 1) return true;
00088 positions.resize(end-beg);
00089 pv.resize(end-beg);
00090 data.resize(beg->second.length());
00091 size_t nsam = beg->second.length(),i=0,j=0;
00092 std::string::const_iterator sb,se;
00093 while((beg+i)<end)
00094 {
00095 pv[i]=*(beg+i);
00096 if ((beg+i)->second.length() != nsam)
00097 {
00098
00099 positions.clear();
00100 data.clear();
00101 return false;
00102 }
00103 positions[i] = (beg+i)->first;
00104 sb = (beg+i)->second.begin();
00105 se = (beg+i)->second.end();
00106 j = 0;
00107 while( (sb+j) < se )
00108 {
00109 data[j] += *(sb+j);
00110 ++j;
00111 }
00112 ++i;
00113 }
00114 non_const_access = false;
00115 return true;
00116 }
00117
00118 bool PolyTable::operator==(const PolyTable &rhs) const
00119 {
00120 if (positions.size() != rhs.positions.size()
00121 || data.size() != rhs.data.size())
00122 return false;
00123
00124 for(unsigned i = 0 ; i < positions.size() ; ++i)
00125 if (positions[i] != rhs.positions[i])
00126 return false;
00127
00128 for(unsigned i = 0 ; i < data.size() ; ++i)
00129 if (data[i] != rhs.data[i])
00130 return false;
00131
00132 return true;
00133 }
00134
00135 bool PolyTable::operator!=(const PolyTable &rhs) const
00136 {
00137 return !(*this==rhs);
00138 }
00139
00140 PolyTable::operator Sequence::polySiteVector() const
00144 {
00145 if(non_const_access==true)
00146 {
00147 pv = Sequence::rotatePolyTable(this);
00148 non_const_access=false;
00149 }
00150 return pv;
00151 }
00152
00153 PolyTable::data_iterator PolyTable::begin()
00158 {
00159 non_const_access=true;
00160 return data.begin();
00161 }
00162
00163 PolyTable::data_iterator PolyTable::end()
00168 {
00169 non_const_access=true;
00170 return data.end();
00171 }
00172
00173 PolyTable::const_data_iterator PolyTable::begin() const
00178 {
00179 return data.begin();
00180 }
00181
00182 PolyTable::const_data_iterator PolyTable::end() const
00187 {
00188 return data.end();
00189 }
00190
00191 PolyTable::pos_iterator PolyTable::pbegin()
00195 {
00196 non_const_access=true;
00197 return positions.begin();
00198 }
00199
00200 PolyTable::pos_iterator PolyTable::pend()
00204 {
00205 non_const_access=true;
00206 return positions.end();
00207 }
00208
00209 PolyTable::const_pos_iterator PolyTable::pbegin() const
00213 {
00214 return positions.begin();
00215 }
00216
00217 PolyTable::const_pos_iterator PolyTable::pend() const
00221 {
00222
00223 return positions.end();
00224 }
00225
00226 PolyTable::const_site_iterator PolyTable::sbegin() const
00232 {
00233 if(non_const_access == true)
00234 {
00235 pv = Sequence::rotatePolyTable(this);
00236 non_const_access=false;
00237 }
00238 return pv.begin();
00239 }
00240
00241 PolyTable::const_site_iterator PolyTable::send() const
00247 {
00248 if(non_const_access == true)
00249 {
00250 pv = Sequence::rotatePolyTable(this);
00251 non_const_access=false;
00252 }
00253 return pv.end();
00254 }
00255
00256 void PolyTable::ApplyFreqFilter(unsigned mincount,bool haveOutgroup,
00257 unsigned outgroup)
00268 {
00269 std::vector<double> newpos;
00270 std::vector<std::string> newdata(data.size());
00271
00272 for(unsigned i = 0 ; i < positions.size() ; ++i)
00273 {
00274 stateCounter Counts;
00275 for (unsigned j = 0 ; j < data.size() ; ++j)
00276 {
00277 if(haveOutgroup==false||
00278 (haveOutgroup==true && j != outgroup))
00279 {
00280 Counts(data[j][i]);
00281 }
00282 }
00283 bool freq = true;
00284 if (Counts.a > 0 && Counts.a < mincount)
00285 {
00286 freq = false;
00287 }
00288 else if (freq == true && Counts.g > 0 && Counts.g < mincount)
00289 {
00290 freq = false;
00291 }
00292 else if (freq == true && Counts.c > 0 && Counts.c < mincount)
00293 {
00294 freq = false;
00295 }
00296 else if (freq == true && Counts.t > 0 && Counts.t < mincount)
00297 {
00298 freq = false;
00299 }
00300 else if (freq == true && Counts.zero > 0 && Counts.zero < mincount)
00301 {
00302 freq = false;
00303 }
00304 else if (freq == true && Counts.one > 0 && Counts.one < mincount)
00305 {
00306 freq = false;
00307 }
00308 if(freq == true)
00309 {
00310 newpos.push_back(positions[i]);
00311 for(unsigned j = 0 ; j < data.size() ; ++j)
00312 {
00313 newdata[j] += data[j][i];
00314 }
00315 }
00316 }
00317
00318 assign(&newpos[0],newpos.size(),&newdata[0],newdata.size());
00319 }
00320
00321 void PolyTable::RemoveMultiHits(bool skipOutgroup,
00322 unsigned outgroup)
00332 {
00333 std::vector<double> newpos;
00334 std::vector<std::string> newdata(data.size());
00335
00336 for(unsigned i = 0 ; i < positions.size() ; ++i)
00337 {
00338 stateCounter Counts;
00339 for (unsigned j = 0 ; j < data.size() ; ++j)
00340 {
00341 if((skipOutgroup==false)||
00342 (skipOutgroup==true && j != outgroup))
00343 {
00344 Counts(data[j][i]);
00345 }
00346 }
00347 if(Counts.nStates() <= 2)
00348 {
00349 newpos.push_back(positions[i]);
00350 for(unsigned j = 0 ; j < data.size() ; ++j)
00351 {
00352 newdata[j] += data[j][i];
00353 }
00354 }
00355 }
00356
00357 assign(&newpos[0],newpos.size(),&newdata[0],newdata.size());
00358 }
00359
00360 void PolyTable::RemoveMissing(bool skipOutgroup,
00361 unsigned outgroup)
00369 {
00370 std::vector<double> newpos;
00371 std::vector<std::string> newdata(data.size());
00372
00373 for(unsigned i = 0 ; i < positions.size() ; ++i)
00374 {
00375 bool hasMissing=false;
00376 for (unsigned j = 0 ; j < data.size() ; ++j)
00377 {
00378 if((skipOutgroup==false)||
00379 (skipOutgroup==true && j != outgroup))
00380 {
00381 switch(char(std::toupper(data[j][i])))
00382 {
00383 case 'N':
00384 hasMissing=true;
00385 break;
00386 }
00387 }
00388 }
00389 if(hasMissing==false)
00390 {
00391 newpos.push_back(positions[i]);
00392 for(unsigned j = 0 ; j < data.size() ; ++j)
00393 {
00394 newdata[j] += data[j][i];
00395 }
00396 }
00397 }
00398
00399 assign(&newpos[0],newpos.size(),&newdata[0],newdata.size());
00400 }
00401
00402 void PolyTable::RemoveAmbiguous(bool skipOutgroup,
00403 unsigned outgroup)
00411 {
00412 std::vector<double> newpos;
00413 std::vector<std::string> newdata(data.size());
00414
00415 for(unsigned i = 0 ; i < positions.size() ; ++i)
00416 {
00417 stateCounter c;
00418 for (unsigned j = 0 ; j < data.size() ; ++j)
00419 {
00420 if((skipOutgroup==false)||
00421 (skipOutgroup==true && j != outgroup))
00422 {
00423 c(data[j][i]);
00424 }
00425 }
00426 if (c.ndna == 0)
00427 {
00428 newpos.push_back(positions[i]);
00429 for(unsigned j = 0 ; j < data.size() ; ++j)
00430 {
00431 newdata[j] += data[j][i];
00432 }
00433 }
00434 }
00435
00436 assign(&newpos[0],newpos.size(),&newdata[0],newdata.size());
00437 }
00438
00439 void
00440 PolyTable::Binary (bool haveOutgroup, unsigned outgroup, bool strictInfSites )
00452 {
00453 unsigned nsites = this->numsites();
00454
00455 std::vector<double> newpositions;
00456 std::vector<unsigned> _pos_indexes;
00457 std::vector<std::string> newdata((*this).size());
00458
00459 std::string _outgroup;
00460 if(haveOutgroup == true)
00461 {
00462 _outgroup = (*this)[outgroup];
00463 }
00464 else
00465 {
00466 _outgroup = (*this)[0];
00467 }
00468
00469 for(unsigned j =0 ; j < nsites ; ++j)
00470 {
00471 stateCounter Counts;
00472 for(unsigned i = 0 ; i < (*this).size() ; ++i)
00473 {
00474 Counts ((*this)[i][j]);
00475 }
00476 if(Counts.nStates()==2)
00477 {
00478 newpositions.push_back(this->position(j));
00479 _pos_indexes.push_back(j);
00480 }
00481 }
00482
00483 for(unsigned i = 0 ; i < (*this).size() ; ++i)
00484 {
00485 for(unsigned j = 0 ; j < _pos_indexes.size() ; ++j)
00486 {
00487 if(haveOutgroup==false ||
00488 (haveOutgroup==true && _outgroup[j] != 'N'))
00489
00490
00491 {
00492 if ( (*this)[i][_pos_indexes[j]] == 'N')
00493 {
00494
00495 newdata[i] += 'N';
00496 }
00497 else if( (*this)[i][_pos_indexes[j]]
00498 != _outgroup[_pos_indexes[j]] )
00499 {
00500
00501 newdata[i] += '1';
00502 }
00503 else if ( (*this)[i][_pos_indexes[j]]
00504 == _outgroup[_pos_indexes[j]] )
00505 {
00506
00507 newdata[i] += '0';
00508 }
00509
00510 }
00511 }
00512 }
00513
00514 assign(&newpositions[0],newpositions.size(),&newdata[0],newdata.size());
00515 }
00516
00517
00518 std::vector < double >
00519 PolyTable::GetPositions (void) const
00523 {
00524 return positions;
00525 }
00526
00527 std::vector <std::string > PolyTable::GetData (void) const
00534 {
00535 return data;
00536 }
00537
00538 }