Data from coalescent simulations. More...
#include <Sequence/SimData.hpp>
Public Types | |
| typedef std::string & | reference |
| typedef const std::string & | const_reference |
|
typedef std::vector < std::string >::size_type | size_type |
| typedef std::vector < std::string >::iterator | data_iterator |
| typedef std::vector < std::string > ::const_iterator | const_data_iterator |
| typedef std::vector< double > ::iterator | pos_iterator |
| typedef std::vector< double > ::const_iterator | const_pos_iterator |
| typedef Sequence::polySiteVector::const_iterator | const_site_iterator |
Public Member Functions | |
| SimData (const size_t &nsam=0, const size_t &nsnps=0) | |
| SimData (double *pos, char **sample, int nsam, int S) | |
| SimData (const std::vector< double > &pos, const std::vector< std::string > &data) | |
| SimData (const SimData::const_site_iterator beg, const SimData::const_site_iterator end) | |
| virtual std::istream & | read (std::istream &s) throw () |
| virtual std::ostream & | print (std::ostream &o) const |
| void | Binary (bool haveOutgroup=false, unsigned outgroup=0, bool strictInfSites=true) |
| virtual int | fromfile (FILE *openedfile) |
| int | segsites (void) const |
| data_iterator | begin () |
| const_data_iterator | begin () const |
| data_iterator | end () |
| const_data_iterator | end () const |
| pos_iterator | pbegin () |
| const_pos_iterator | pbegin () const |
| pos_iterator | pend () |
| const_pos_iterator | pend () const |
| const_site_iterator | sbegin () const |
| const_site_iterator | send () const |
| std::vector< double > | GetPositions (void) const |
| std::vector< std::string > | GetData (void) const |
| virtual void | ApplyFreqFilter (unsigned mincount, bool haveOutgroup=false, unsigned outgroup=0) |
| virtual void | RemoveMultiHits (bool skipOutgroup=false, unsigned outgroup=0) |
| virtual void | RemoveMissing (bool skipOutgroup=false, unsigned outgroup=0) |
| virtual void | RemoveAmbiguous (bool skipOutgroup=false, unsigned outgroup=0) |
| virtual bool | operator== (const PolyTable &rhs) const |
| virtual bool | operator!= (const PolyTable &rhs) const |
| operator Sequence::polySiteVector () const | |
| const_reference | operator[] (const size_type &i) const |
| reference | operator[] (const size_type &i) |
| bool | empty () const |
| bool | assign (PolyTable::const_site_iterator beg, PolyTable::const_site_iterator end) |
| template<typename numeric_type , typename string_type > | |
| bool | assign (const numeric_type *_positions, const size_t &_num_positions, const string_type *_data, const size_t &_num_individuals) |
| size_type | size (void) const |
| double | position (const std::vector< double >::size_type &i) const |
| unsigned | numsites (void) const |
Data from coalescent simulations.
A class for reading in data from ms. ms is Dick Hudson's coalescent simulation program, available from http://home.uchicago.edu/~rhudson1/. This class reads in the data one record at a time. As of the time this document was written, the output of Hudon's program for one run looks like:
segsites: 10
positions: 0.2512 0.3212 0.3449 0.4386 0.4571 0.4842 0.5745 0.6333 0.7042 0.9928
0100010010
0000000100
0100010011
0000000000
0100010010
0101010010
0100010011
0110010010
1100111010
0000000000
Look at the example program tajd.cc for an example of how to pipe simulated data into a C++ program using these libraries.
bottleneck.cc, fragments.cc, ms--.cc, and msstats.cc.
Definition at line 69 of file SimData.hpp.
typedef std::vector<std::string>::const_iterator Sequence::PolyTable::const_data_iterator [inherited] |
const iterator to the data
Definition at line 90 of file PolyTable.hpp.
typedef std::vector<double>::const_iterator Sequence::PolyTable::const_pos_iterator [inherited] |
const iterator to the positions
Definition at line 98 of file PolyTable.hpp.
typedef Sequence::polySiteVector::const_iterator Sequence::PolyTable::const_site_iterator [inherited] |
Const iterator to segregating sites. The value type of this iterator is const std::pair<double,std::string>, where the double is the position of the segregating site, and the string the list of states at the site. The first character in the string corresponds to the state of the first character in the PolyTable (i.e. (*this)[0]), etc.
Definition at line 107 of file PolyTable.hpp.
typedef std::vector<std::string>::iterator Sequence::PolyTable::data_iterator [inherited] |
non-const iterator to the data
Definition at line 86 of file PolyTable.hpp.
typedef std::vector<double>::iterator Sequence::PolyTable::pos_iterator [inherited] |
non-const iterator to the positions
Definition at line 94 of file PolyTable.hpp.
| Sequence::SimData::SimData | ( | const size_t & | nsam = 0, |
|
| const size_t & | nsnps = 0 | |||
| ) | [explicit] |
The constructor needs to know the sample size simulated. This is easily obtainted using Sequence::SimParams. An example of this is found in the example file tajd.cc
Definition at line 32 of file SimData.cc.
| void Sequence::PolyTable::ApplyFreqFilter | ( | unsigned | mincount, | |
| bool | haveOutgroup = false, |
|||
| unsigned | outgroup = 0 | |||
| ) | [virtual, inherited] |
go through the data and remove all positions where there is a variant at count (# of occurences in the sample) < minfreq
| mincount | minimum count of a variant in the data. Variants that occur < mincount times are thrown out. | |
| haveOutgroup | true if an outgroup is present in the data, false otherwise | |
| outgroup | the index in the data array containing the outgroup (if present) |
Definition at line 256 of file PolyTable.cc.
| bool Sequence::PolyTable::assign | ( | const numeric_type * | _positions, | |
| const size_t & | _num_positions, | |||
| const string_type * | _data, | |||
| const size_t & | _num_individuals | |||
| ) | [inline, inherited] |
Assign SNP data to the polymorphism table from a vector/array.
| _positions | an array representing the positions of the SNPs | |
| _num_positions | the number of elements in _positions | |
| _data | an array containing the characters for each SNP in each individual | |
| _num_individuals | the number of elements in _data |
Sequence::PolySites snpTable; std::vector<double> positions; std::vector<std::string> data; //fill positions and data... if ( snpTable.assign(&positions[0],positions.size(),&data[0],data.size()) == true ) { //ok } else { //assignment failed for some reason... }
Definition at line 34 of file PolyTable.tcc.
| bool Sequence::PolyTable::assign | ( | PolyTable::const_site_iterator | beg, | |
| PolyTable::const_site_iterator | end | |||
| ) | [inherited] |
Assignment operation, allowing a range of polymorphic sites to be assigned to a polymorphism table. This exists mainly for two purposes. One is the ability to assign tables from "slices" of other tables. Second is to facilitate the writing of "sliding window" routines.
Definition at line 71 of file PolyTable.cc.
| PolyTable::const_data_iterator Sequence::PolyTable::begin | ( | ) | const [inherited] |
Definition at line 173 of file PolyTable.cc.
| PolyTable::data_iterator Sequence::PolyTable::begin | ( | ) | [inherited] |
Definition at line 153 of file PolyTable.cc.
| void Sequence::SimData::Binary | ( | bool | haveOutgroup = false, |
|
| unsigned | outgroup = 0, |
|||
| bool | strictInfSites = true | |||
| ) | [inline, virtual] |
Recode the polymorphism table in 0,1 (binary notation)
| haveOutgroup | use true if an outgroup is present, false otherwise | |
| outgroup | the index of the outgroup in the data vector used to construct the object | |
| strictInfSites | if true, throw out all sites with > 2 character states (including outgroup!) |
true, then 0 means an ancestral state and 1 a derived state in the resulting. /note If haveOutgroup == true, and there are sites with missing data in the outrgroup sequence, those sites are removed from the data, since its assumed you actually want to know ancestral/derived for every site Reimplemented from Sequence::PolyTable.
Definition at line 89 of file SimData.hpp.
| bool Sequence::PolyTable::empty | ( | ) | const [inherited] |
Definition at line 66 of file PolyTable.cc.
| PolyTable::const_data_iterator Sequence::PolyTable::end | ( | ) | const [inherited] |
Definition at line 182 of file PolyTable.cc.
| PolyTable::data_iterator Sequence::PolyTable::end | ( | ) | [inherited] |
Definition at line 163 of file PolyTable.cc.
| int Sequence::SimData::fromfile | ( | FILE * | openedfile | ) | [virtual] |
In practice, simulation analysis is I/O intensive. This method provies a routine to read in objects of type Sequence::SimData using C-style I/O routines, rather than the C++ operator>>. This can result in huge efficiency gains
Definition at line 134 of file SimData.cc.
| std::vector< std::string > Sequence::PolyTable::GetData | ( | void | ) | const [inherited] |
Returns PolyTable::data, a vector of std::strings containing polymorphic sites. Assuming the vector is returned to a vector<string> called data, accessing data[i][j] accesses the j-th site of the i-th sequence
Definition at line 527 of file PolyTable.cc.
| std::vector< double > Sequence::PolyTable::GetPositions | ( | void | ) | const [inherited] |
Returns PolyTable::positions.
Definition at line 519 of file PolyTable.cc.
| unsigned Sequence::PolyTable::numsites | ( | void | ) | const [inline, inherited] |
Return how many positions are stored in PolyTable::positions
Definition at line 233 of file PolyTable.hpp.
| Sequence::PolyTable::operator Sequence::polySiteVector | ( | ) | const [inherited] |
allow (implicit) typecast of Sequence::PolyTable to Sequence::polySiteVector
Definition at line 140 of file PolyTable.cc.
| reference Sequence::PolyTable::operator[] | ( | const size_type & | i | ) | [inline, inherited] |
Return the i-th element of PolyTable::data.
Definition at line 160 of file PolyTable.hpp.
| const_reference Sequence::PolyTable::operator[] | ( | const size_type & | i | ) | const [inline, inherited] |
Return the i-th element of PolyTable::data.
Definition at line 150 of file PolyTable.hpp.
| PolyTable::const_pos_iterator Sequence::PolyTable::pbegin | ( | ) | const [inherited] |
Definition at line 209 of file PolyTable.cc.
| PolyTable::pos_iterator Sequence::PolyTable::pbegin | ( | ) | [inherited] |
Definition at line 191 of file PolyTable.cc.
| PolyTable::const_pos_iterator Sequence::PolyTable::pend | ( | ) | const [inherited] |
Definition at line 217 of file PolyTable.cc.
| PolyTable::pos_iterator Sequence::PolyTable::pend | ( | ) | [inherited] |
Definition at line 200 of file PolyTable.cc.
| double Sequence::PolyTable::position | ( | const std::vector< double >::size_type & | i | ) | const [inline, inherited] |
Return the i-th position from the PolyTable::positions.
Definition at line 223 of file PolyTable.hpp.
| std::ostream & Sequence::SimData::print | ( | std::ostream & | o | ) | const [virtual] |
| std::istream & Sequence::SimData::read | ( | std::istream & | stream | ) | throw () [virtual] |
A call to istream Sequence::operator>> on a object of type SimData results in a call to this function. NOTE: This is the C++ way to read in SimData, but it's the slow way
Implements Sequence::PolyTable.
Definition at line 48 of file SimData.cc.
| void Sequence::PolyTable::RemoveAmbiguous | ( | bool | skipOutgroup = false, |
|
| unsigned | outgroup = 0 | |||
| ) | [virtual, inherited] |
go through the data and remove all the sites with states other than {A,G,C,T,N,-}
| skipOutgroup | default is false. If true, the character state of the outgroup is ignored. | |
| outgroup | the index of the outgroup in the data vector |
Definition at line 402 of file PolyTable.cc.
| void Sequence::PolyTable::RemoveMissing | ( | bool | skipOutgroup = false, |
|
| unsigned | outgroup = 0 | |||
| ) | [virtual, inherited] |
go through the data and remove all the sites with missing data (the character N).
| skipOutgroup | default is false. If true, the character state of the outgroup is ignored. | |
| outgroup | the index of the outgroup in the data vector |
Definition at line 360 of file PolyTable.cc.
| void Sequence::PolyTable::RemoveMultiHits | ( | bool | skipOutgroup = false, |
|
| unsigned | outgroup = 0 | |||
| ) | [virtual, inherited] |
go through the data and remove all the sites with more than 2 states segregating. By default, this routine also removes sites where there are 2 states segregating in the ingroup. and the outgroup (if present) has a 3rd state.
| skipOutgroup | default is false. If true, the character state of the outgroup is ignored. | |
| outgroup | the index of the outgroup in the data vector |
Definition at line 321 of file PolyTable.cc.
| PolyTable::const_site_iterator Sequence::PolyTable::sbegin | ( | ) | const [inherited] |
Definition at line 226 of file PolyTable.cc.
| int Sequence::SimData::segsites | ( | void | ) | const [inline] |
Returns the number of segregating sites in the data block.
Definition at line 97 of file SimData.hpp.
| PolyTable::const_site_iterator Sequence::PolyTable::send | ( | ) | const [inherited] |
Definition at line 241 of file PolyTable.cc.
| size_type Sequence::PolyTable::size | ( | void | ) | const [inline, inherited] |
Return how many std::strings are stored in PolyTable::data.
Definition at line 214 of file PolyTable.hpp.
1.6.3