{"version":5,"vars":[{"line":238,"name":"base","kind":2,"containerName":""},{"definition":"my","line":240,"kind":13,"localvar":"my","containerName":null,"name":"$progname"},{"definition":"sub","containerName":"main::","name":"next_assembly","children":[{"line":253,"containerName":"next_assembly","localvar":"my","kind":13,"name":"$self","definition":"my"},{"name":"$scaffoldobj","localvar":"my","containerName":"next_assembly","kind":13,"line":256,"definition":"my"},{"line":256,"name":"new","kind":12,"containerName":"next_assembly"},{"containerName":"next_assembly","kind":13,"name":"$progname","line":256},{"definition":"my","line":259,"containerName":"next_assembly","localvar":"my","kind":13,"name":"$contigobj"},{"definition":"my","localvar":"my","containerName":"next_assembly","kind":13,"name":"$iscontig","line":260},{"definition":"my","name":"%contiginfo","localvar":"my","kind":13,"containerName":"next_assembly","line":261},{"definition":"my","line":262,"localvar":"my","kind":13,"containerName":"next_assembly","name":"$isread"},{"definition":"my","line":263,"localvar":"my","containerName":"next_assembly","kind":13,"name":"%readinfo"},{"containerName":"next_assembly","kind":13,"name":"$self","line":266},{"name":"_readline","containerName":"next_assembly","kind":12,"line":266},{"line":270,"name":"$iscontig","kind":13,"containerName":"next_assembly"},{"line":271,"kind":13,"containerName":"next_assembly","name":"$isread"},{"kind":13,"containerName":"next_assembly","name":"$contiginfo","line":273},{"definition":"my","kind":13,"localvar":"my","containerName":"next_assembly","name":"$readobj","line":275},{"name":"$self","containerName":"next_assembly","kind":13,"line":275},{"containerName":"next_assembly","kind":12,"name":"_store_read","line":275},{"line":275,"name":"%readinfo","containerName":"next_assembly","kind":13},{"name":"$contigobj","kind":13,"containerName":"next_assembly","line":275},{"line":276,"name":"$contiginfo","containerName":"next_assembly","kind":13},{"line":278,"name":"$singletobj","containerName":"next_assembly","localvar":"my","kind":13,"definition":"my"},{"line":278,"name":"$self","containerName":"next_assembly","kind":13},{"containerName":"next_assembly","kind":12,"name":"_store_singlet","line":278},{"line":278,"name":"%readinfo","kind":13,"containerName":"next_assembly"},{"line":278,"kind":13,"containerName":"next_assembly","name":"%contiginfo"},{"line":279,"kind":13,"containerName":"next_assembly","name":"$scaffoldobj"},{"name":"$self","containerName":"next_assembly","kind":13,"line":282},{"line":282,"name":"throw","kind":12,"containerName":"next_assembly"},{"line":285,"name":"%readinfo","kind":13,"containerName":"next_assembly"},{"name":"$contigobj","containerName":"next_assembly","kind":13,"line":287},{"line":288,"name":"%contiginfo","kind":13,"containerName":"next_assembly"},{"name":"$iscontig","containerName":"next_assembly","kind":13,"line":290},{"line":292,"kind":13,"containerName":"next_assembly","name":"$iscontig"},{"name":"$isread","kind":13,"containerName":"next_assembly","line":293},{"name":"$contigobj","kind":13,"containerName":"next_assembly","line":295},{"containerName":"next_assembly","kind":13,"name":"$self","line":295},{"line":295,"name":"_store_contig","containerName":"next_assembly","kind":12},{"line":295,"containerName":"next_assembly","kind":13,"name":"%contiginfo"},{"line":295,"name":"$contigobj","containerName":"next_assembly","kind":13},{"name":"$scaffoldobj","containerName":"next_assembly","kind":13,"line":296},{"line":296,"name":"$contiginfo","kind":13,"containerName":"next_assembly"},{"line":297,"name":"$isread","containerName":"next_assembly","kind":13},{"line":300,"containerName":"next_assembly","kind":13,"name":"$iscontig"},{"kind":13,"containerName":"next_assembly","name":"$isread","line":301},{"name":"$contiginfo","containerName":"next_assembly","kind":13,"line":303},{"definition":"my","containerName":"next_assembly","localvar":"my","kind":13,"name":"$readobj","line":305},{"line":305,"containerName":"next_assembly","kind":13,"name":"$self"},{"line":305,"kind":12,"containerName":"next_assembly","name":"_store_read"},{"kind":13,"containerName":"next_assembly","name":"%readinfo","line":305},{"line":305,"name":"$contigobj","kind":13,"containerName":"next_assembly"},{"kind":13,"containerName":"next_assembly","name":"$contiginfo","line":306},{"name":"$singletobj","kind":13,"localvar":"my","containerName":"next_assembly","line":308,"definition":"my"},{"name":"$self","containerName":"next_assembly","kind":13,"line":308},{"line":308,"kind":12,"containerName":"next_assembly","name":"_store_singlet"},{"containerName":"next_assembly","kind":13,"name":"%readinfo","line":308},{"line":309,"name":"%contiginfo","containerName":"next_assembly","kind":13},{"kind":13,"containerName":"next_assembly","name":"$scaffoldobj","line":309},{"kind":13,"containerName":"next_assembly","name":"$self","line":312},{"containerName":"next_assembly","kind":12,"name":"throw","line":312},{"name":"%readinfo","containerName":"next_assembly","kind":13,"line":315},{"line":318,"name":"$self","kind":13,"containerName":"next_assembly"},{"line":318,"name":"throw","containerName":"next_assembly","kind":12},{"name":"$iscontig","containerName":"next_assembly","kind":13,"line":321},{"line":323,"name":"$contiginfo","kind":13,"containerName":"next_assembly"},{"line":324,"name":"$contiginfo","containerName":"next_assembly","kind":13},{"line":325,"name":"$contiginfo","kind":13,"containerName":"next_assembly"},{"containerName":"next_assembly","kind":13,"name":"$contiginfo","line":326},{"line":327,"name":"$contiginfo","containerName":"next_assembly","kind":13},{"containerName":"next_assembly","kind":13,"name":"$contiginfo","line":328},{"line":329,"name":"$contiginfo","containerName":"next_assembly","kind":13},{"containerName":"next_assembly","kind":13,"name":"$contiginfo","line":330},{"line":331,"name":"$contiginfo","kind":13,"containerName":"next_assembly"},{"name":"$contiginfo","kind":13,"containerName":"next_assembly","line":332},{"line":333,"name":"$contiginfo","kind":13,"containerName":"next_assembly"},{"line":334,"name":"$contiginfo","kind":13,"containerName":"next_assembly"},{"kind":13,"containerName":"next_assembly","name":"$contiginfo","line":335},{"containerName":"next_assembly","kind":13,"name":"$contiginfo","line":336},{"containerName":"next_assembly","kind":13,"name":"$contiginfo","line":337},{"containerName":"next_assembly","kind":13,"name":"$contiginfo","line":338},{"line":339,"kind":13,"containerName":"next_assembly","name":"$contiginfo"},{"line":340,"containerName":"next_assembly","kind":13,"name":"$contiginfo"},{"line":341,"name":"$contiginfo","containerName":"next_assembly","kind":13},{"name":"$self","kind":13,"containerName":"next_assembly","line":343},{"line":343,"name":"throw","containerName":"next_assembly","kind":12},{"containerName":"next_assembly","kind":13,"name":"$isread","line":346},{"line":348,"name":"$readinfo","kind":13,"containerName":"next_assembly"},{"line":349,"name":"$readinfo","kind":13,"containerName":"next_assembly"},{"line":350,"name":"$readinfo","kind":13,"containerName":"next_assembly"},{"line":351,"name":"$readinfo","containerName":"next_assembly","kind":13},{"line":352,"name":"$readinfo","kind":13,"containerName":"next_assembly"},{"name":"$readinfo","kind":13,"containerName":"next_assembly","line":353},{"name":"$readinfo","containerName":"next_assembly","kind":13,"line":354},{"line":355,"name":"$readinfo","kind":13,"containerName":"next_assembly"},{"line":356,"containerName":"next_assembly","kind":13,"name":"$readinfo"},{"name":"$readinfo","kind":13,"containerName":"next_assembly","line":357},{"name":"$self","kind":13,"containerName":"next_assembly","line":359},{"line":359,"name":"throw","kind":12,"containerName":"next_assembly"},{"name":"$self","containerName":"next_assembly","kind":13,"line":364},{"line":364,"containerName":"next_assembly","kind":12,"name":"throw"},{"line":369,"name":"$contiginfo","containerName":"next_assembly","kind":13},{"line":370,"containerName":"next_assembly","kind":13,"name":"$contiginfo"},{"definition":"my","line":372,"name":"$readobj","containerName":"next_assembly","localvar":"my","kind":13},{"containerName":"next_assembly","kind":13,"name":"$self","line":372},{"containerName":"next_assembly","kind":12,"name":"_store_read","line":372},{"name":"%readinfo","kind":13,"containerName":"next_assembly","line":372},{"line":372,"name":"$contigobj","containerName":"next_assembly","kind":13},{"name":"$contiginfo","containerName":"next_assembly","kind":13,"line":373},{"localvar":"my","kind":13,"containerName":"next_assembly","name":"$singletobj","line":375,"definition":"my"},{"line":375,"name":"$self","kind":13,"containerName":"next_assembly"},{"line":375,"name":"_store_singlet","kind":12,"containerName":"next_assembly"},{"name":"%readinfo","kind":13,"containerName":"next_assembly","line":375},{"line":375,"name":"%contiginfo","kind":13,"containerName":"next_assembly"},{"line":376,"kind":13,"containerName":"next_assembly","name":"$scaffoldobj"},{"line":379,"name":"$self","kind":13,"containerName":"next_assembly"},{"line":379,"name":"throw","containerName":"next_assembly","kind":12},{"line":383,"kind":13,"containerName":"next_assembly","name":"%readinfo"},{"name":"$contigobj","containerName":"next_assembly","kind":13,"line":385},{"line":386,"containerName":"next_assembly","kind":13,"name":"%contiginfo"},{"kind":13,"containerName":"next_assembly","name":"$scaffoldobj","line":388},{"kind":12,"containerName":"next_assembly","name":"update_seq_list","line":388},{"name":"$scaffoldobj","containerName":"next_assembly","kind":13,"line":390}],"kind":12,"range":{"start":{"character":0,"line":252},"end":{"line":391,"character":9999}},"line":252},{"kind":12,"containerName":"Assembly::Scaffold","name":"Bio","line":256},{"signature":{"label":"_qual_hex2dec($self,$qual)","parameters":[{"label":"$self"},{"label":"$qual"}],"documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string"},"kind":12,"range":{"start":{"character":0,"line":403},"end":{"character":9999,"line":408}},"line":403,"detail":"($self,$qual)","definition":"sub","name":"_qual_hex2dec","containerName":"main::","children":[{"name":"$self","containerName":"_qual_hex2dec","localvar":"my","kind":13,"line":404,"definition":"my"},{"line":404,"name":"$qual","containerName":"_qual_hex2dec","kind":13},{"kind":13,"containerName":"_qual_hex2dec","name":"$qual","line":405},{"line":406,"kind":13,"containerName":"_qual_hex2dec","name":"$qual"},{"line":407,"kind":13,"containerName":"_qual_hex2dec","name":"$qual"}]},{"definition":"sub","detail":"($self,$qual)","children":[{"definition":"my","line":421,"name":"$self","localvar":"my","containerName":"_qual_dec2hex","kind":13},{"line":421,"containerName":"_qual_dec2hex","kind":13,"name":"$qual"},{"line":422,"containerName":"_qual_dec2hex","kind":13,"name":"$qual"},{"line":423,"name":"$qual","containerName":"_qual_dec2hex","kind":13},{"name":"$qual","kind":13,"containerName":"_qual_dec2hex","line":423},{"line":424,"kind":13,"containerName":"_qual_dec2hex","name":"$qual"}],"name":"_qual_dec2hex","containerName":"main::","signature":{"label":"_qual_dec2hex($self,$qual)","documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_hex2dec {\n    my ($self, $qual) = @_;\n    $qual =~ s/^0x(.*)$/$1/;\n    $qual =~ s/(..)/hex($1).' '/eg;\n    return $qual;\n}\n\n=head2 _qual_dec2hex\n\n    Title   : _qual_dec2hex\n    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);\n    Function: convert a decimal quality score into an hexadecimal quality score \n    Returns : string\n    Args    : string","parameters":[{"label":"$self"},{"label":"$qual"}]},"line":420,"range":{"start":{"character":0,"line":420},"end":{"line":425,"character":9999}},"kind":12},{"children":[{"kind":13,"localvar":"my","containerName":"_store_contig","name":"$self","line":440,"definition":"my"},{"line":440,"name":"$contiginfo","kind":13,"containerName":"_store_contig"},{"kind":13,"containerName":"_store_contig","name":"$contigobj","line":440},{"line":440,"containerName":"_store_contig","kind":13,"name":"$scaffoldobj"},{"line":443,"name":"$contigobj","kind":13,"containerName":"_store_contig"},{"kind":12,"containerName":"_store_contig","name":"new","line":443},{"line":445,"name":"$progname","containerName":"_store_contig","kind":13},{"line":448,"kind":13,"containerName":"_store_contig","name":"$scaffoldobj"},{"kind":12,"containerName":"_store_contig","name":"add_contig","line":448},{"line":448,"containerName":"_store_contig","kind":13,"name":"$contigobj"},{"line":452,"name":"$consensus","localvar":"my","kind":13,"containerName":"_store_contig","definition":"my"},{"line":452,"name":"new","containerName":"_store_contig","kind":12},{"line":457,"name":"$contigobj","kind":13,"containerName":"_store_contig"},{"line":457,"kind":12,"containerName":"_store_contig","name":"set_consensus_sequence"},{"line":457,"kind":13,"containerName":"_store_contig","name":"$consensus"},{"line":460,"name":"$self","containerName":"_store_contig","kind":13},{"line":460,"kind":12,"containerName":"_store_contig","name":"_qual_hex2dec"},{"line":461,"name":"$qual","localvar":"my","kind":13,"containerName":"_store_contig","definition":"my"},{"name":"new","kind":12,"containerName":"_store_contig","line":461},{"kind":13,"containerName":"_store_contig","name":"$contigobj","line":465},{"line":465,"name":"set_consensus_quality","kind":12,"containerName":"_store_contig"},{"name":"$qual","containerName":"_store_contig","kind":13,"line":465},{"definition":"my","name":"$contigtags","localvar":"my","kind":13,"containerName":"_store_contig","line":468},{"name":"new","kind":12,"containerName":"_store_contig","line":468},{"containerName":"_store_contig","kind":13,"name":"$contigobj","line":471},{"name":"get_consensus_length","kind":12,"containerName":"_store_contig","line":471},{"name":"$contigobj","kind":13,"containerName":"_store_contig","line":486},{"line":486,"kind":12,"containerName":"_store_contig","name":"add_features"},{"name":"$contigtags","kind":13,"containerName":"_store_contig","line":486},{"kind":13,"containerName":"_store_contig","name":"$contigobj","line":488}],"containerName":"main::","name":"_store_contig","definition":"sub","detail":"($self,$contiginfo,$contigobj,$scaffoldobj)","line":439,"range":{"end":{"line":489,"character":9999},"start":{"line":439,"character":0}},"kind":12,"signature":{"documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_hex2dec {\n    my ($self, $qual) = @_;\n    $qual =~ s/^0x(.*)$/$1/;\n    $qual =~ s/(..)/hex($1).' '/eg;\n    return $qual;\n}\n\n=head2 _qual_dec2hex\n\n    Title   : _qual_dec2hex\n    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);\n    Function: convert a decimal quality score into an hexadecimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_dec2hex {\n    my ($self, $qual) = @_;\n    $qual =~ s/(\\d+)\\s*/sprintf('%02X', $1)/eg;\n    $qual = '0x'.$qual;\n    return $qual;\n}\n\n=head2 _store_contig\n\n    Title   : _store_contig\n    Usage   : my $contigobj; $contigobj = $self->_store_contig(\n              \\%contiginfo, $contigobj, $scaffoldobj);\n    Function: store information of a contig belonging to a scaffold in the\n              appropriate object\n    Returns : Bio::Assembly::Contig object\n    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold","parameters":[{"label":"$self"},{"label":"$contiginfo"},{"label":"$contigobj"},{"label":"$scaffoldobj"}],"label":"_store_contig($self,$contiginfo,$contigobj,$scaffoldobj)"}},{"name":"Bio","containerName":"Assembly::Contig","kind":12,"line":443},{"line":444,"name":"contiginfo","kind":12},{"line":452,"kind":12,"containerName":"LocatableSeq","name":"Bio"},{"line":453,"name":"contiginfo","kind":12},{"name":"contiginfo","kind":12,"line":454},{"line":460,"kind":12,"name":"contiginfo"},{"name":"contiginfo","kind":12,"line":460},{"line":461,"kind":12,"containerName":"Seq::Quality","name":"Bio"},{"line":462,"name":"contiginfo","kind":12},{"name":"contiginfo","kind":12,"line":463},{"containerName":"SeqFeature::Generic","kind":12,"name":"Bio","line":468},{"line":473,"kind":12,"name":"contiginfo"},{"kind":12,"name":"contiginfo","line":474},{"name":"contiginfo","kind":12,"line":475},{"line":476,"kind":12,"name":"contiginfo"},{"name":"contiginfo","kind":12,"line":477},{"kind":12,"name":"contiginfo","line":478},{"line":479,"name":"contiginfo","kind":12},{"line":480,"name":"contiginfo","kind":12},{"line":481,"kind":12,"name":"contiginfo"},{"line":482,"kind":12,"name":"contiginfo"},{"line":483,"name":"contiginfo","kind":12},{"kind":12,"name":"contiginfo","line":484},{"definition":"sub","detail":"($self,$readinfo,$contigobj)","children":[{"definition":"my","line":502,"name":"$self","localvar":"my","kind":13,"containerName":"_store_read"},{"containerName":"_store_read","kind":13,"name":"$readinfo","line":502},{"line":502,"name":"$contigobj","kind":13,"containerName":"_store_read"},{"localvar":"my","kind":13,"containerName":"_store_read","name":"$readobj","line":507,"definition":"my"},{"name":"new","kind":12,"containerName":"_store_read","line":507},{"line":510,"name":"$self","kind":13,"containerName":"_store_read"},{"kind":12,"containerName":"_store_read","name":"_merge_seq_name_and_db","line":510},{"kind":13,"containerName":"_store_read","name":"$self","line":511},{"line":511,"kind":12,"containerName":"_store_read","name":"_merge_seq_name_and_db"},{"name":"$alncoord","kind":13,"localvar":"my","containerName":"_store_read","line":521,"definition":"my"},{"line":521,"name":"new","containerName":"_store_read","kind":12},{"name":"$readobj","kind":13,"containerName":"_store_read","line":522},{"containerName":"_store_read","kind":12,"name":"id","line":522},{"kind":13,"containerName":"_store_read","name":"$contigobj","line":526},{"line":526,"name":"id","kind":12,"containerName":"_store_read"},{"line":528,"kind":13,"containerName":"_store_read","name":"$contigobj"},{"kind":12,"containerName":"_store_read","name":"set_seq_coord","line":528},{"line":528,"name":"$alncoord","containerName":"_store_read","kind":13},{"name":"$readobj","containerName":"_store_read","kind":13,"line":528},{"line":532,"kind":13,"containerName":"_store_read","name":"$contigobj"},{"name":"change_coord","kind":12,"containerName":"_store_read","line":532},{"line":532,"containerName":"_store_read","kind":13,"name":"$readobj"},{"name":"id","kind":12,"containerName":"_store_read","line":532},{"line":533,"name":"$contigobj","containerName":"_store_read","kind":13},{"line":533,"name":"change_coord","kind":12,"containerName":"_store_read"},{"name":"$readobj","kind":13,"containerName":"_store_read","line":533},{"containerName":"_store_read","kind":12,"name":"id","line":533},{"line":534,"name":"$clipcoord","kind":13,"localvar":"my","containerName":"_store_read","definition":"my"},{"name":"new","kind":12,"containerName":"_store_read","line":534},{"line":535,"kind":13,"containerName":"_store_read","name":"$readobj"},{"containerName":"_store_read","kind":12,"name":"id","line":535},{"line":540,"name":"$clipcoord","containerName":"_store_read","kind":13},{"line":540,"containerName":"_store_read","kind":12,"name":"attach_seq"},{"line":540,"name":"$readobj","containerName":"_store_read","kind":13},{"name":"$contigobj","kind":13,"containerName":"_store_read","line":541},{"line":541,"name":"add_features","kind":12,"containerName":"_store_read"},{"kind":13,"containerName":"_store_read","name":"$clipcoord","line":541},{"name":"$readtags","kind":13,"localvar":"my","containerName":"_store_read","line":544,"definition":"my"},{"line":544,"name":"new","containerName":"_store_read","kind":12},{"name":"$readobj","kind":13,"containerName":"_store_read","line":545},{"name":"id","containerName":"_store_read","kind":12,"line":545},{"name":"$alncoord","kind":13,"containerName":"_store_read","line":552},{"line":552,"name":"add_sub_SeqFeature","kind":12,"containerName":"_store_read"},{"containerName":"_store_read","kind":13,"name":"$readtags","line":552},{"kind":13,"containerName":"_store_read","name":"$readobj","line":554}],"name":"_store_read","containerName":"main::","signature":{"parameters":[{"label":"$self"},{"label":"$readinfo"},{"label":"$contigobj"}],"documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_hex2dec {\n    my ($self, $qual) = @_;\n    $qual =~ s/^0x(.*)$/$1/;\n    $qual =~ s/(..)/hex($1).' '/eg;\n    return $qual;\n}\n\n=head2 _qual_dec2hex\n\n    Title   : _qual_dec2hex\n    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);\n    Function: convert a decimal quality score into an hexadecimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_dec2hex {\n    my ($self, $qual) = @_;\n    $qual =~ s/(\\d+)\\s*/sprintf('%02X', $1)/eg;\n    $qual = '0x'.$qual;\n    return $qual;\n}\n\n=head2 _store_contig\n\n    Title   : _store_contig\n    Usage   : my $contigobj; $contigobj = $self->_store_contig(\n              \\%contiginfo, $contigobj, $scaffoldobj);\n    Function: store information of a contig belonging to a scaffold in the\n              appropriate object\n    Returns : Bio::Assembly::Contig object\n    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold\n\n\nsub _store_contig {\n    my ($self, $contiginfo, $contigobj, $scaffoldobj) = @_;\n\n    # Create a contig and attach it to scaffold\n    $contigobj = Bio::Assembly::Contig->new(\n        -id     => $$contiginfo{'asmbl_id'},\n        -source => $progname,\n        -strand => 1\n    );\n    $scaffoldobj->add_contig($contigobj);\n\n    # Create a gapped consensus sequence and attach it to contig\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $consensus = Bio::LocatableSeq->new(\n        -id    => $$contiginfo{'asmbl_id'},\n        -seq   => $$contiginfo{'lsequence'},\n        -start => 1,\n    );\n    $contigobj->set_consensus_sequence($consensus);\n\n    # Create an gapped consensus quality score and attach it to contig\n    $$contiginfo{'quality'} = $self->_qual_hex2dec($$contiginfo{'quality'});\n    my $qual = Bio::Seq::Quality->new(\n        -id   => $$contiginfo{'asmbl_id'},\n        -qual => $$contiginfo{'quality'}\n    );\n    $contigobj->set_consensus_quality($qual);\n\n    # Add other misc contig information as features of the contig\n    my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$$contiginfo{'asmbl_id'}\",\n        -start       => 1,\n        -end         => $contigobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n    );\n    $contigobj->add_features([ $contigtags ], 1);\n\n    return $contigobj;\n}\n\n=head2 _store_read\n\n    Title   : _store_read\n    Usage   : my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n    Function: store information of a read belonging to a contig in the appropriate object\n    Returns : Bio::LocatableSeq\n    Args    : hash, Bio::Assembly::Contig","label":"_store_read($self,$readinfo,$contigobj)"},"line":501,"kind":12,"range":{"end":{"line":555,"character":9999},"start":{"line":501,"character":0}}},{"line":506,"kind":12,"name":"readinfo"},{"line":506,"name":"readinfo","kind":12},{"name":"readinfo","kind":12,"line":506},{"name":"Bio","kind":12,"containerName":"LocatableSeq","line":507},{"line":510,"name":"readinfo","kind":12},{"line":510,"kind":12,"name":"readinfo"},{"kind":12,"name":"readinfo","line":511},{"kind":12,"name":"readinfo","line":511},{"line":512,"name":"readinfo","kind":12},{"line":514,"name":"readinfo","kind":12},{"name":"readinfo","kind":12,"line":519},{"line":519,"name":"readinfo","kind":12},{"kind":12,"name":"readinfo","line":520},{"line":520,"name":"readinfo","kind":12},{"line":520,"kind":12,"name":"readinfo"},{"line":521,"name":"Bio","containerName":"SeqFeature::Generic","kind":12},{"kind":12,"name":"readinfo","line":523},{"line":524,"kind":12,"name":"readinfo"},{"line":525,"name":"readinfo","kind":12},{"name":"readinfo","kind":12,"line":532},{"line":532,"name":"readinfo","kind":12},{"name":"readinfo","kind":12,"line":533},{"line":533,"name":"readinfo","kind":12},{"line":534,"kind":12,"containerName":"SeqFeature::Generic","name":"Bio"},{"line":536,"kind":12,"name":"readinfo"},{"line":537,"name":"readinfo","kind":12},{"line":538,"kind":12,"name":"readinfo"},{"name":"Bio","kind":12,"containerName":"SeqFeature::Generic","line":544},{"line":546,"kind":12,"name":"readinfo"},{"name":"readinfo","kind":12,"line":547},{"name":"readinfo","kind":12,"line":548},{"kind":12,"name":"readinfo","line":549},{"line":550,"kind":12,"name":"readinfo"},{"signature":{"label":"_store_singlet($self,$readinfo,$contiginfo,$scaffoldobj)","documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_hex2dec {\n    my ($self, $qual) = @_;\n    $qual =~ s/^0x(.*)$/$1/;\n    $qual =~ s/(..)/hex($1).' '/eg;\n    return $qual;\n}\n\n=head2 _qual_dec2hex\n\n    Title   : _qual_dec2hex\n    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);\n    Function: convert a decimal quality score into an hexadecimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_dec2hex {\n    my ($self, $qual) = @_;\n    $qual =~ s/(\\d+)\\s*/sprintf('%02X', $1)/eg;\n    $qual = '0x'.$qual;\n    return $qual;\n}\n\n=head2 _store_contig\n\n    Title   : _store_contig\n    Usage   : my $contigobj; $contigobj = $self->_store_contig(\n              \\%contiginfo, $contigobj, $scaffoldobj);\n    Function: store information of a contig belonging to a scaffold in the\n              appropriate object\n    Returns : Bio::Assembly::Contig object\n    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold\n\n\nsub _store_contig {\n    my ($self, $contiginfo, $contigobj, $scaffoldobj) = @_;\n\n    # Create a contig and attach it to scaffold\n    $contigobj = Bio::Assembly::Contig->new(\n        -id     => $$contiginfo{'asmbl_id'},\n        -source => $progname,\n        -strand => 1\n    );\n    $scaffoldobj->add_contig($contigobj);\n\n    # Create a gapped consensus sequence and attach it to contig\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $consensus = Bio::LocatableSeq->new(\n        -id    => $$contiginfo{'asmbl_id'},\n        -seq   => $$contiginfo{'lsequence'},\n        -start => 1,\n    );\n    $contigobj->set_consensus_sequence($consensus);\n\n    # Create an gapped consensus quality score and attach it to contig\n    $$contiginfo{'quality'} = $self->_qual_hex2dec($$contiginfo{'quality'});\n    my $qual = Bio::Seq::Quality->new(\n        -id   => $$contiginfo{'asmbl_id'},\n        -qual => $$contiginfo{'quality'}\n    );\n    $contigobj->set_consensus_quality($qual);\n\n    # Add other misc contig information as features of the contig\n    my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$$contiginfo{'asmbl_id'}\",\n        -start       => 1,\n        -end         => $contigobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n    );\n    $contigobj->add_features([ $contigtags ], 1);\n\n    return $contigobj;\n}\n\n=head2 _store_read\n\n    Title   : _store_read\n    Usage   : my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n    Function: store information of a read belonging to a contig in the appropriate object\n    Returns : Bio::LocatableSeq\n    Args    : hash, Bio::Assembly::Contig\n\n\nsub _store_read {\n   my ($self, $readinfo, $contigobj) = @_;\n\n   # Create an aligned read object\n   #$$readinfo{'llength'} = length($$readinfo{'lsequence'});\n   $$readinfo{'strand'}  = ($$readinfo{'seq_rend'} > $$readinfo{'seq_lend'} ? 1 : -1);\n   my $readobj = Bio::LocatableSeq->new(\n       # the ids of sequence objects are supposed to include the db name in it, i.e. \"big_db|seq1234\"\n       # that's how sequence ids coming from the fasta parser are at least\n       -display_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -primary_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -seq        => $$readinfo{'lsequence'},      \n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna'\n   );\n\n   # Add read location and sequence to contig (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => $readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigobj->id() }\n   );\n   $contigobj->set_seq_coord($alncoord, $readobj);\n\n   # Add quality clipping read information in contig features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_lend'});\n   $$readinfo{'clip_end'}   = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_rend'});\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_quality_clipping:'.$readobj->id,\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'}\n   );\n   $clipcoord->attach_seq($readobj);\n   $contigobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_main_read_feature:'.$readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n\n   return $readobj;\n}\n\n=head2 _store_singlet\n\n    Title   : _store_singlet\n    Usage   : my $singletobj = $self->_store_read(\\%readinfo, \\%contiginfo,\n                  $scaffoldobj);\n    Function: store information of a singlet belonging to a scaffold in the appropriate object\n    Returns : Bio::Assembly::Singlet\n    Args    : hash, hash, Bio::Assembly::Scaffold","parameters":[{"label":"$self"},{"label":"$readinfo"},{"label":"$contiginfo"},{"label":"$scaffoldobj"}]},"line":568,"kind":12,"range":{"start":{"character":0,"line":568},"end":{"character":9999,"line":653}},"definition":"sub","detail":"($self,$readinfo,$contiginfo,$scaffoldobj)","children":[{"line":569,"name":"$self","localvar":"my","kind":13,"containerName":"_store_singlet","definition":"my"},{"kind":13,"containerName":"_store_singlet","name":"$readinfo","line":569},{"line":569,"kind":13,"containerName":"_store_singlet","name":"$contiginfo"},{"kind":13,"containerName":"_store_singlet","name":"$scaffoldobj","line":569},{"definition":"my","line":573,"name":"$contigid","localvar":"my","kind":13,"containerName":"_store_singlet"},{"containerName":"_store_singlet","localvar":"my","kind":13,"name":"$readid","line":574,"definition":"my"},{"line":574,"containerName":"_store_singlet","kind":13,"name":"$self"},{"line":574,"name":"_merge_seq_name_and_db","kind":12,"containerName":"_store_singlet"},{"line":578,"kind":13,"localvar":"my","containerName":"_store_singlet","name":"$seqobj","definition":"my"},{"line":578,"kind":12,"containerName":"_store_singlet","name":"new"},{"name":"$contigid","kind":13,"containerName":"_store_singlet","line":579},{"line":580,"kind":13,"containerName":"_store_singlet","name":"$readid"},{"line":585,"name":"$self","kind":13,"containerName":"_store_singlet"},{"name":"_qual_hex2dec","containerName":"_store_singlet","kind":12,"line":585},{"definition":"my","containerName":"_store_singlet","localvar":"my","kind":13,"name":"$singletobj","line":589},{"name":"new","kind":12,"containerName":"_store_singlet","line":589},{"name":"$seqobj","kind":13,"containerName":"_store_singlet","line":589},{"line":590,"kind":13,"containerName":"_store_singlet","name":"$scaffoldobj"},{"line":590,"name":"add_singlet","kind":12,"containerName":"_store_singlet"},{"line":590,"containerName":"_store_singlet","kind":13,"name":"$singletobj"},{"definition":"my","line":593,"name":"$contigtags","containerName":"_store_singlet","localvar":"my","kind":13},{"line":593,"containerName":"_store_singlet","kind":12,"name":"new"},{"line":596,"name":"$singletobj","kind":13,"containerName":"_store_singlet"},{"name":"get_consensus_length","kind":12,"containerName":"_store_singlet","line":596},{"name":"$singletobj","containerName":"_store_singlet","kind":13,"line":611},{"containerName":"_store_singlet","kind":12,"name":"add_features","line":611},{"name":"$contigtags","kind":13,"containerName":"_store_singlet","line":611},{"definition":"my","line":617,"name":"$alncoord","containerName":"_store_singlet","localvar":"my","kind":13},{"line":617,"containerName":"_store_singlet","kind":12,"name":"new"},{"name":"$contigid","kind":13,"containerName":"_store_singlet","line":622},{"line":624,"kind":13,"containerName":"_store_singlet","name":"$alncoord"},{"line":624,"containerName":"_store_singlet","kind":12,"name":"attach_seq"},{"containerName":"_store_singlet","kind":13,"name":"$singletobj","line":624},{"name":"seqref","kind":12,"containerName":"_store_singlet","line":624},{"name":"$singletobj","containerName":"_store_singlet","kind":13,"line":625},{"containerName":"_store_singlet","kind":12,"name":"add_features","line":625},{"line":625,"containerName":"_store_singlet","kind":13,"name":"$alncoord"},{"line":631,"name":"$clipcoord","containerName":"_store_singlet","localvar":"my","kind":13,"definition":"my"},{"containerName":"_store_singlet","kind":12,"name":"new","line":631},{"kind":13,"containerName":"_store_singlet","name":"$contigid","line":636},{"name":"$clipcoord","kind":13,"containerName":"_store_singlet","line":638},{"line":638,"name":"attach_seq","containerName":"_store_singlet","kind":12},{"name":"$singletobj","kind":13,"containerName":"_store_singlet","line":638},{"line":638,"kind":12,"containerName":"_store_singlet","name":"seqref"},{"name":"$singletobj","containerName":"_store_singlet","kind":13,"line":639},{"name":"add_features","kind":12,"containerName":"_store_singlet","line":639},{"name":"$clipcoord","containerName":"_store_singlet","kind":13,"line":639},{"localvar":"my","kind":13,"containerName":"_store_singlet","name":"$readtags","line":642,"definition":"my"},{"kind":12,"containerName":"_store_singlet","name":"new","line":642},{"line":650,"kind":13,"containerName":"_store_singlet","name":"$alncoord"},{"containerName":"_store_singlet","kind":12,"name":"add_sub_SeqFeature","line":650},{"line":650,"kind":13,"containerName":"_store_singlet","name":"$readtags"},{"line":652,"containerName":"_store_singlet","kind":13,"name":"$singletobj"}],"name":"_store_singlet","containerName":"main::"},{"line":573,"name":"contiginfo","kind":12},{"name":"readinfo","kind":12,"line":574},{"name":"readinfo","kind":12,"line":574},{"kind":12,"containerName":"Seq::Quality","name":"Bio","line":578},{"kind":12,"name":"contiginfo","line":581},{"line":583,"kind":12,"name":"readinfo"},{"line":585,"kind":12,"name":"contiginfo"},{"containerName":"Assembly::Singlet","kind":12,"name":"Bio","line":589},{"kind":12,"containerName":"SeqFeature::Generic","name":"Bio","line":593},{"line":598,"kind":12,"name":"contiginfo"},{"line":599,"name":"contiginfo","kind":12},{"line":600,"name":"contiginfo","kind":12},{"line":601,"name":"contiginfo","kind":12},{"line":602,"name":"contiginfo","kind":12},{"name":"contiginfo","kind":12,"line":603},{"kind":12,"name":"contiginfo","line":604},{"name":"contiginfo","kind":12,"line":605},{"kind":12,"name":"contiginfo","line":606},{"line":607,"name":"contiginfo","kind":12},{"name":"contiginfo","kind":12,"line":608},{"line":609,"kind":12,"name":"contiginfo"},{"line":614,"kind":12,"name":"readinfo"},{"kind":12,"name":"readinfo","line":614},{"line":615,"name":"readinfo","kind":12},{"kind":12,"name":"readinfo","line":615},{"line":615,"name":"readinfo","kind":12},{"name":"Bio","containerName":"SeqFeature::Generic","kind":12,"line":617},{"name":"readinfo","kind":12,"line":619},{"line":620,"kind":12,"name":"readinfo"},{"line":621,"name":"readinfo","kind":12},{"name":"readinfo","kind":12,"line":629},{"kind":12,"name":"readinfo","line":629},{"name":"readinfo","kind":12,"line":630},{"line":630,"name":"readinfo","kind":12},{"line":631,"name":"Bio","containerName":"SeqFeature::Generic","kind":12},{"kind":12,"name":"readinfo","line":633},{"line":634,"kind":12,"name":"readinfo"},{"kind":12,"name":"readinfo","line":635},{"kind":12,"containerName":"SeqFeature::Generic","name":"Bio","line":642},{"kind":12,"name":"readinfo","line":644},{"name":"readinfo","kind":12,"line":645},{"kind":12,"name":"readinfo","line":646},{"line":647,"name":"readinfo","kind":12},{"line":648,"kind":12,"name":"readinfo"},{"detail":"($self,@args)","definition":"sub","name":"write_assembly","containerName":"main::","children":[{"definition":"my","line":667,"name":"$self","localvar":"my","containerName":"write_assembly","kind":13},{"line":667,"name":"@args","kind":13,"containerName":"write_assembly"},{"definition":"my","name":"$scaffoldobj","kind":13,"localvar":"my","containerName":"write_assembly","line":668},{"name":"$singlets","kind":13,"containerName":"write_assembly","line":668},{"line":668,"name":"$self","kind":13,"containerName":"write_assembly"},{"line":668,"name":"_rearrange","kind":12,"containerName":"write_assembly"},{"name":"@args","containerName":"write_assembly","kind":13,"line":668},{"line":671,"containerName":"write_assembly","kind":13,"name":"$scaffoldobj"},{"kind":13,"containerName":"write_assembly","name":"$scaffoldobj","line":671},{"name":"isa","containerName":"write_assembly","kind":12,"line":671},{"kind":13,"containerName":"write_assembly","name":"$self","line":672},{"line":672,"kind":12,"containerName":"write_assembly","name":"warn"},{"line":678,"name":"@cont_ids","kind":13,"localvar":"my","containerName":"write_assembly","definition":"my"},{"line":678,"containerName":"write_assembly","kind":13,"name":"$scaffoldobj"},{"line":678,"containerName":"write_assembly","kind":12,"name":"get_contig_ids"},{"definition":"my","name":"@sing_ids","kind":13,"localvar":"my","containerName":"write_assembly","line":679},{"containerName":"write_assembly","kind":13,"name":"$scaffoldobj","line":679},{"line":679,"kind":12,"containerName":"write_assembly","name":"get_singlet_ids"},{"definition":"my","localvar":"my","kind":13,"containerName":"write_assembly","name":"%did","line":680},{"containerName":"write_assembly","localvar":"my","kind":13,"name":"$decimal_format","line":681,"definition":"my"},{"line":682,"containerName":"write_assembly","localvar":"my","kind":13,"name":"$i","definition":"my"},{"containerName":"write_assembly","kind":13,"name":"$i","line":682},{"line":682,"name":"@sing_ids","containerName":"write_assembly","kind":13},{"line":682,"name":"$i","kind":13,"containerName":"write_assembly"},{"name":"$display_id","kind":13,"localvar":"my","containerName":"write_assembly","line":684,"definition":"my"},{"line":684,"name":"$sing_ids","kind":13,"containerName":"write_assembly"},{"containerName":"write_assembly","kind":13,"name":"$i","line":684},{"line":686,"name":"$primary_id","kind":13,"localvar":"my","containerName":"write_assembly","definition":"my"},{"containerName":"write_assembly","kind":13,"name":"$scaffoldobj","line":686},{"kind":12,"containerName":"write_assembly","name":"get_singlet_by_id","line":686},{"line":686,"name":"$display_id","containerName":"write_assembly","kind":13},{"line":686,"containerName":"write_assembly","kind":12,"name":"seqref"},{"containerName":"write_assembly","kind":12,"name":"primary_id","line":686},{"line":687,"kind":13,"containerName":"write_assembly","name":"$sing_ids"},{"name":"$i","containerName":"write_assembly","kind":13,"line":687},{"containerName":"write_assembly","kind":13,"name":"$primary_id","line":687},{"name":"$did","containerName":"write_assembly","kind":13,"line":688},{"containerName":"write_assembly","kind":13,"name":"$primary_id","line":688},{"line":688,"containerName":"write_assembly","kind":13,"name":"$display_id"},{"line":690,"containerName":"write_assembly","localvar":"my","kind":13,"name":"@ids","definition":"my"},{"name":"@cont_ids","containerName":"write_assembly","kind":13,"line":690},{"line":690,"kind":13,"containerName":"write_assembly","name":"@sing_ids"},{"line":691,"name":"@ids","containerName":"write_assembly","kind":13},{"line":691,"name":"$a","containerName":"write_assembly","kind":13},{"name":"$b","kind":13,"containerName":"write_assembly","line":691},{"name":"@ids","containerName":"write_assembly","kind":13,"line":691},{"definition":"my","name":"$numobj","kind":13,"localvar":"my","containerName":"write_assembly","line":692},{"name":"@ids","containerName":"write_assembly","kind":13,"line":692},{"definition":"my","name":"$i","localvar":"my","containerName":"write_assembly","kind":13,"line":695},{"name":"$i","kind":13,"containerName":"write_assembly","line":695},{"line":695,"containerName":"write_assembly","kind":13,"name":"$numobj"},{"containerName":"write_assembly","kind":13,"name":"$i","line":695},{"line":697,"containerName":"write_assembly","localvar":"my","kind":13,"name":"$objid","definition":"my"},{"line":697,"kind":13,"containerName":"write_assembly","name":"$ids"},{"line":697,"kind":13,"containerName":"write_assembly","name":"$i"},{"kind":13,"containerName":"write_assembly","name":"$did","line":699},{"name":"$objid","kind":13,"containerName":"write_assembly","line":699},{"line":701,"name":"$singlets","kind":13,"containerName":"write_assembly"},{"definition":"my","line":703,"name":"$contigid","localvar":"my","kind":13,"containerName":"write_assembly"},{"name":"$objid","containerName":"write_assembly","kind":13,"line":703},{"name":"$readid","localvar":"my","kind":13,"containerName":"write_assembly","line":704,"definition":"my"},{"name":"$did","kind":13,"containerName":"write_assembly","line":704},{"kind":13,"containerName":"write_assembly","name":"$objid","line":704},{"kind":13,"localvar":"my","containerName":"write_assembly","name":"$singletobj","line":705,"definition":"my"},{"line":705,"name":"$scaffoldobj","containerName":"write_assembly","kind":13},{"line":705,"containerName":"write_assembly","kind":12,"name":"get_singlet_by_id"},{"name":"$readid","kind":13,"containerName":"write_assembly","line":705},{"definition":"my","line":708,"localvar":"my","kind":13,"containerName":"write_assembly","name":"$contanno"},{"line":709,"kind":12,"containerName":"write_assembly","name":"primary_tag"},{"kind":13,"containerName":"write_assembly","name":"$singletobj","line":710},{"line":710,"name":"get_features_collection","containerName":"write_assembly","kind":12},{"line":711,"name":"get_all_features","containerName":"write_assembly","kind":12},{"localvar":"my","kind":13,"containerName":"write_assembly","name":"%contiginfo","line":712,"definition":"my"},{"kind":13,"containerName":"write_assembly","name":"$contiginfo","line":713},{"name":"$singletobj","containerName":"write_assembly","kind":13,"line":713},{"line":713,"containerName":"write_assembly","kind":12,"name":"seqref"},{"line":713,"name":"seq","kind":12,"containerName":"write_assembly"},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":714},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":714},{"kind":13,"containerName":"write_assembly","name":"$contiginfo","line":715},{"name":"$self","containerName":"write_assembly","kind":13,"line":715},{"line":715,"kind":12,"containerName":"write_assembly","name":"_qual_dec2hex"},{"line":716,"name":"$singletobj","kind":13,"containerName":"write_assembly"},{"line":716,"name":"seqref","containerName":"write_assembly","kind":12},{"name":"qual","kind":12,"containerName":"write_assembly","line":716},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":717},{"name":"$contigid","containerName":"write_assembly","kind":13,"line":717},{"line":718,"containerName":"write_assembly","kind":13,"name":"$contiginfo"},{"name":"$contanno","kind":13,"containerName":"write_assembly","line":718},{"containerName":"write_assembly","kind":12,"name":"get_tag_values","line":718},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":719},{"containerName":"write_assembly","kind":13,"name":"$contanno","line":719},{"line":719,"name":"get_tag_values","containerName":"write_assembly","kind":12},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":720},{"line":720,"containerName":"write_assembly","kind":13,"name":"$contanno"},{"line":720,"kind":12,"containerName":"write_assembly","name":"get_tag_values"},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":721},{"name":"$contanno","kind":13,"containerName":"write_assembly","line":721},{"name":"get_tag_values","containerName":"write_assembly","kind":12,"line":721},{"containerName":"write_assembly","kind":13,"name":"$contiginfo","line":722},{"name":"$contanno","kind":13,"containerName":"write_assembly","line":722},{"line":722,"containerName":"write_assembly","kind":12,"name":"get_tag_values"},{"line":723,"kind":13,"containerName":"write_assembly","name":"$contiginfo"},{"kind":13,"containerName":"write_assembly","name":"$decimal_format","line":723},{"containerName":"write_assembly","kind":13,"name":"$contiginfo","line":724},{"name":"$decimal_format","containerName":"write_assembly","kind":13,"line":725},{"line":725,"containerName":"write_assembly","kind":13,"name":"$self"},{"line":725,"kind":12,"containerName":"write_assembly","name":"_perc_N"},{"containerName":"write_assembly","kind":13,"name":"$contiginfo","line":725},{"containerName":"write_assembly","kind":13,"name":"$contiginfo","line":726},{"containerName":"write_assembly","kind":13,"name":"$contiginfo","line":727},{"name":"$contanno","kind":13,"containerName":"write_assembly","line":727},{"name":"get_tag_values","kind":12,"containerName":"write_assembly","line":727},{"kind":13,"containerName":"write_assembly","name":"$contiginfo","line":728},{"line":728,"kind":13,"containerName":"write_assembly","name":"$contanno"},{"line":728,"name":"get_tag_values","kind":12,"containerName":"write_assembly"},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":729},{"line":729,"kind":13,"containerName":"write_assembly","name":"$contanno"},{"line":729,"containerName":"write_assembly","kind":12,"name":"get_tag_values"},{"line":730,"name":"$contiginfo","containerName":"write_assembly","kind":13},{"kind":13,"containerName":"write_assembly","name":"$contanno","line":730},{"line":730,"name":"get_tag_values","containerName":"write_assembly","kind":12},{"line":731,"kind":13,"containerName":"write_assembly","name":"$contiginfo"},{"line":731,"name":"$self","kind":13,"containerName":"write_assembly"},{"name":"_date_time","kind":12,"containerName":"write_assembly","line":731},{"kind":13,"containerName":"write_assembly","name":"$contiginfo","line":732},{"containerName":"write_assembly","kind":13,"name":"$contanno","line":732},{"line":732,"containerName":"write_assembly","kind":12,"name":"get_tag_values"},{"line":733,"name":"$contiginfo","kind":13,"containerName":"write_assembly"},{"name":"$contanno","kind":13,"containerName":"write_assembly","line":733},{"line":733,"name":"get_tag_values","containerName":"write_assembly","kind":12},{"kind":13,"containerName":"write_assembly","name":"$contiginfo","line":736},{"kind":13,"containerName":"write_assembly","name":"$contiginfo","line":736},{"containerName":"write_assembly","kind":13,"name":"$contiginfo","line":737},{"line":737,"name":"$contiginfo","containerName":"write_assembly","kind":13},{"line":738,"name":"$contiginfo","kind":13,"containerName":"write_assembly"},{"containerName":"write_assembly","kind":13,"name":"$contiginfo","line":738},{"line":739,"name":"$contiginfo","kind":13,"containerName":"write_assembly"},{"line":739,"name":"$contiginfo","containerName":"write_assembly","kind":13},{"line":740,"containerName":"write_assembly","kind":13,"name":"$contiginfo"},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":740},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":741},{"line":741,"kind":13,"containerName":"write_assembly","name":"$contiginfo"},{"line":742,"kind":13,"containerName":"write_assembly","name":"$contiginfo"},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":742},{"line":743,"kind":13,"containerName":"write_assembly","name":"$contiginfo"},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":743},{"containerName":"write_assembly","kind":13,"name":"$contiginfo","line":744},{"line":744,"name":"$contiginfo","kind":13,"containerName":"write_assembly"},{"line":745,"name":"$contiginfo","containerName":"write_assembly","kind":13},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":745},{"line":746,"containerName":"write_assembly","kind":13,"name":"$contiginfo"},{"kind":13,"containerName":"write_assembly","name":"$contiginfo","line":746},{"line":749,"containerName":"write_assembly","kind":13,"name":"$self"},{"line":749,"name":"_print","containerName":"write_assembly","kind":12},{"definition":"my","line":773,"localvar":"my","containerName":"write_assembly","kind":13,"name":"$seq_name"},{"line":773,"name":"$db","containerName":"write_assembly","kind":13},{"name":"$self","containerName":"write_assembly","kind":13,"line":773},{"line":773,"name":"_split_seq_name_and_db","kind":12,"containerName":"write_assembly"},{"line":773,"containerName":"write_assembly","kind":13,"name":"$readid"},{"definition":"my","line":774,"localvar":"my","kind":13,"containerName":"write_assembly","name":"$clipcoord"},{"line":775,"containerName":"write_assembly","kind":12,"name":"primary_tag"},{"line":776,"kind":13,"containerName":"write_assembly","name":"$singletobj"},{"line":776,"containerName":"write_assembly","kind":12,"name":"get_features_collection"},{"line":777,"name":"get_all_features","kind":12,"containerName":"write_assembly"},{"line":778,"name":"$alncoord","localvar":"my","kind":13,"containerName":"write_assembly","definition":"my"},{"line":779,"containerName":"write_assembly","kind":12,"name":"primary_tag"},{"name":"$singletobj","containerName":"write_assembly","kind":13,"line":780},{"name":"get_features_collection","kind":12,"containerName":"write_assembly","line":780},{"line":781,"containerName":"write_assembly","kind":12,"name":"get_all_features"},{"line":782,"name":"$readanno","localvar":"my","containerName":"write_assembly","kind":13,"definition":"my"},{"line":783,"kind":12,"containerName":"write_assembly","name":"primary_tag"},{"kind":13,"containerName":"write_assembly","name":"$singletobj","line":784},{"kind":12,"containerName":"write_assembly","name":"get_seq_coord","line":784},{"name":"$singletobj","kind":13,"containerName":"write_assembly","line":784},{"kind":12,"containerName":"write_assembly","name":"seqref","line":784},{"name":"get_SeqFeatures","kind":12,"containerName":"write_assembly","line":785},{"line":786,"localvar":"my","kind":13,"containerName":"write_assembly","name":"%readinfo","definition":"my"},{"line":787,"name":"$readinfo","containerName":"write_assembly","kind":13},{"line":787,"name":"$seq_name","kind":13,"containerName":"write_assembly"},{"line":788,"containerName":"write_assembly","kind":13,"name":"$readinfo"},{"name":"$alncoord","kind":13,"containerName":"write_assembly","line":788},{"line":788,"kind":12,"containerName":"write_assembly","name":"location"},{"name":"start","kind":12,"containerName":"write_assembly","line":788},{"name":"$readinfo","kind":13,"containerName":"write_assembly","line":789},{"line":789,"name":"$alncoord","containerName":"write_assembly","kind":13},{"name":"location","containerName":"write_assembly","kind":12,"line":789},{"line":789,"name":"end","kind":12,"containerName":"write_assembly"},{"line":790,"kind":13,"containerName":"write_assembly","name":"$readinfo"},{"line":790,"containerName":"write_assembly","kind":13,"name":"$clipcoord"},{"kind":12,"containerName":"write_assembly","name":"location","line":790},{"kind":12,"containerName":"write_assembly","name":"start","line":790},{"name":"$readinfo","containerName":"write_assembly","kind":13,"line":791},{"name":"$clipcoord","containerName":"write_assembly","kind":13,"line":791},{"line":791,"name":"location","kind":12,"containerName":"write_assembly"},{"containerName":"write_assembly","kind":12,"name":"end","line":791},{"name":"$readinfo","kind":13,"containerName":"write_assembly","line":792},{"containerName":"write_assembly","kind":13,"name":"$readanno","line":792},{"name":"get_tag_values","containerName":"write_assembly","kind":12,"line":792},{"name":"$readinfo","kind":13,"containerName":"write_assembly","line":793},{"line":793,"name":"$readanno","kind":13,"containerName":"write_assembly"},{"kind":12,"containerName":"write_assembly","name":"get_tag_values","line":793},{"name":"$readinfo","kind":13,"containerName":"write_assembly","line":794},{"name":"$db","containerName":"write_assembly","kind":13,"line":794},{"line":795,"name":"$readinfo","kind":13,"containerName":"write_assembly"},{"name":"$readinfo","kind":13,"containerName":"write_assembly","line":797},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":797},{"line":800,"name":"$readinfo","kind":13,"containerName":"write_assembly"},{"name":"$readinfo","containerName":"write_assembly","kind":13,"line":800},{"line":801,"containerName":"write_assembly","kind":13,"name":"$readinfo"},{"containerName":"write_assembly","kind":13,"name":"$readinfo","line":801},{"name":"$self","kind":13,"containerName":"write_assembly","line":804},{"line":804,"containerName":"write_assembly","kind":12,"name":"_print"},{"line":816,"name":"$i","containerName":"write_assembly","kind":13},{"containerName":"write_assembly","kind":13,"name":"$numobj","line":816},{"line":817,"name":"$self","containerName":"write_assembly","kind":13},{"containerName":"write_assembly","kind":12,"name":"_print","line":817},{"definition":"my","line":821,"name":"$contigid","containerName":"write_assembly","localvar":"my","kind":13},{"line":821,"name":"$objid","containerName":"write_assembly","kind":13},{"line":822,"localvar":"my","kind":13,"containerName":"write_assembly","name":"$contigobj","definition":"my"},{"line":822,"name":"$scaffoldobj","containerName":"write_assembly","kind":13},{"line":822,"name":"get_contig_by_id","kind":12,"containerName":"write_assembly"},{"line":822,"name":"$contigid","containerName":"write_assembly","kind":13},{"kind":13,"containerName":"write_assembly","name":"$contigobj","line":825},{"line":825,"name":"num_sequences","kind":12,"containerName":"write_assembly"},{"name":"$singlets","containerName":"write_assembly","kind":13,"line":825},{"definition":"my","line":828,"localvar":"my","containerName":"write_assembly","kind":13,"name":"$contanno"},{"name":"primary_tag","kind":12,"containerName":"write_assembly","line":829},{"line":830,"containerName":"write_assembly","kind":13,"name":"$contigobj"},{"name":"get_features_collection","containerName":"write_assembly","kind":12,"line":830},{"name":"get_all_features","containerName":"write_assembly","kind":12,"line":831},{"kind":13,"localvar":"my","containerName":"write_assembly","name":"%contiginfo","line":832,"definition":"my"},{"kind":13,"containerName":"write_assembly","name":"$contiginfo","line":833},{"name":"$self","kind":13,"containerName":"write_assembly","line":833},{"line":833,"name":"_ungap","kind":12,"containerName":"write_assembly"},{"name":"$contigobj","kind":13,"containerName":"write_assembly","line":834},{"line":834,"kind":12,"containerName":"write_assembly","name":"get_consensus_sequence"},{"name":"seq","containerName":"write_assembly","kind":12,"line":834},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":835},{"line":835,"name":"$contigobj","kind":13,"containerName":"write_assembly"},{"line":835,"name":"get_consensus_sequence","containerName":"write_assembly","kind":12},{"line":835,"name":"seq","kind":12,"containerName":"write_assembly"},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":836},{"line":836,"name":"$self","kind":13,"containerName":"write_assembly"},{"kind":12,"containerName":"write_assembly","name":"_qual_dec2hex","line":836},{"name":"$contigobj","containerName":"write_assembly","kind":13,"line":837},{"name":"get_consensus_quality","containerName":"write_assembly","kind":12,"line":837},{"kind":12,"containerName":"write_assembly","name":"qual","line":837},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":838},{"line":838,"containerName":"write_assembly","kind":13,"name":"$contigid"},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":839},{"line":839,"name":"$contanno","kind":13,"containerName":"write_assembly"},{"line":839,"containerName":"write_assembly","kind":12,"name":"get_tag_values"},{"line":840,"containerName":"write_assembly","kind":13,"name":"$contiginfo"},{"kind":13,"containerName":"write_assembly","name":"$contanno","line":840},{"containerName":"write_assembly","kind":12,"name":"get_tag_values","line":840},{"line":841,"kind":13,"containerName":"write_assembly","name":"$contiginfo"},{"line":841,"containerName":"write_assembly","kind":13,"name":"$contanno"},{"line":841,"containerName":"write_assembly","kind":12,"name":"get_tag_values"},{"kind":13,"containerName":"write_assembly","name":"$contiginfo","line":842},{"kind":13,"containerName":"write_assembly","name":"$contanno","line":842},{"line":842,"name":"get_tag_values","kind":12,"containerName":"write_assembly"},{"line":843,"kind":13,"containerName":"write_assembly","name":"$contiginfo"},{"containerName":"write_assembly","kind":13,"name":"$contanno","line":843},{"kind":12,"containerName":"write_assembly","name":"get_tag_values","line":843},{"line":844,"name":"$contiginfo","kind":13,"containerName":"write_assembly"},{"name":"$decimal_format","containerName":"write_assembly","kind":13,"line":845},{"kind":13,"containerName":"write_assembly","name":"$self","line":845},{"containerName":"write_assembly","kind":12,"name":"_redundancy","line":845},{"line":845,"containerName":"write_assembly","kind":13,"name":"$contigobj"},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":846},{"kind":13,"containerName":"write_assembly","name":"$decimal_format","line":847},{"name":"$self","containerName":"write_assembly","kind":13,"line":847},{"line":847,"containerName":"write_assembly","kind":12,"name":"_perc_N"},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":847},{"line":848,"name":"$contiginfo","containerName":"write_assembly","kind":13},{"containerName":"write_assembly","kind":13,"name":"$contigobj","line":848},{"containerName":"write_assembly","kind":12,"name":"num_sequences","line":848},{"line":849,"kind":13,"containerName":"write_assembly","name":"$contiginfo"},{"line":849,"containerName":"write_assembly","kind":13,"name":"$contanno"},{"line":849,"name":"get_tag_values","kind":12,"containerName":"write_assembly"},{"line":850,"containerName":"write_assembly","kind":13,"name":"$contiginfo"},{"containerName":"write_assembly","kind":13,"name":"$contanno","line":850},{"line":850,"kind":12,"containerName":"write_assembly","name":"get_tag_values"},{"line":851,"containerName":"write_assembly","kind":13,"name":"$contiginfo"},{"line":851,"kind":13,"containerName":"write_assembly","name":"$contanno"},{"line":851,"name":"get_tag_values","kind":12,"containerName":"write_assembly"},{"containerName":"write_assembly","kind":13,"name":"$contiginfo","line":852},{"line":852,"name":"$contanno","containerName":"write_assembly","kind":13},{"line":852,"name":"get_tag_values","containerName":"write_assembly","kind":12},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":853},{"name":"$self","containerName":"write_assembly","kind":13,"line":853},{"containerName":"write_assembly","kind":12,"name":"_date_time","line":853},{"line":854,"containerName":"write_assembly","kind":13,"name":"$contiginfo"},{"name":"$contanno","kind":13,"containerName":"write_assembly","line":854},{"kind":12,"containerName":"write_assembly","name":"get_tag_values","line":854},{"line":855,"name":"$contiginfo","kind":13,"containerName":"write_assembly"},{"line":855,"kind":13,"containerName":"write_assembly","name":"$contanno"},{"kind":12,"containerName":"write_assembly","name":"get_tag_values","line":855},{"line":858,"name":"$contiginfo","kind":13,"containerName":"write_assembly"},{"line":858,"name":"$contiginfo","kind":13,"containerName":"write_assembly"},{"line":859,"name":"$contiginfo","containerName":"write_assembly","kind":13},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":859},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":860},{"containerName":"write_assembly","kind":13,"name":"$contiginfo","line":860},{"line":861,"kind":13,"containerName":"write_assembly","name":"$contiginfo"},{"line":861,"kind":13,"containerName":"write_assembly","name":"$contiginfo"},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":862},{"line":862,"kind":13,"containerName":"write_assembly","name":"$contiginfo"},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":863},{"line":863,"containerName":"write_assembly","kind":13,"name":"$contiginfo"},{"kind":13,"containerName":"write_assembly","name":"$contiginfo","line":864},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":864},{"containerName":"write_assembly","kind":13,"name":"$contiginfo","line":865},{"kind":13,"containerName":"write_assembly","name":"$contiginfo","line":865},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":866},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":866},{"name":"$contiginfo","containerName":"write_assembly","kind":13,"line":867},{"name":"$contiginfo","kind":13,"containerName":"write_assembly","line":867},{"line":868,"name":"$contiginfo","containerName":"write_assembly","kind":13},{"line":868,"name":"$contiginfo","kind":13,"containerName":"write_assembly"},{"line":871,"name":"$self","containerName":"write_assembly","kind":13},{"line":871,"containerName":"write_assembly","kind":12,"name":"_print"},{"definition":"my","kind":13,"localvar":"my","containerName":"write_assembly","name":"$seqno","line":893},{"kind":13,"localvar":"my","containerName":"write_assembly","name":"$readobj","line":894,"definition":"my"},{"containerName":"write_assembly","kind":13,"name":"$contigobj","line":894},{"line":894,"kind":12,"containerName":"write_assembly","name":"each_seq"},{"line":895,"name":"$seqno","kind":13,"containerName":"write_assembly"},{"line":898,"name":"$seq_name","containerName":"write_assembly","localvar":"my","kind":13,"definition":"my"},{"containerName":"write_assembly","kind":13,"name":"$db","line":898},{"line":898,"name":"$self","containerName":"write_assembly","kind":13},{"name":"_split_seq_name_and_db","containerName":"write_assembly","kind":12,"line":898},{"line":898,"kind":13,"containerName":"write_assembly","name":"$readobj"},{"name":"id","containerName":"write_assembly","kind":12,"line":898},{"definition":"my","line":899,"localvar":"my","containerName":"write_assembly","kind":13,"name":"$asm_lend"},{"name":"$asm_rend","containerName":"write_assembly","kind":13,"line":899},{"name":"$seq_lend","containerName":"write_assembly","kind":13,"line":899},{"name":"$seq_rend","containerName":"write_assembly","kind":13,"line":899},{"line":899,"kind":13,"containerName":"write_assembly","name":"$offset"},{"line":900,"name":"$self","containerName":"write_assembly","kind":13},{"kind":12,"containerName":"write_assembly","name":"_coord","line":900},{"line":900,"name":"$readobj","containerName":"write_assembly","kind":13},{"name":"$contigobj","containerName":"write_assembly","kind":13,"line":900},{"localvar":"my","kind":13,"containerName":"write_assembly","name":"$readanno","line":901,"definition":"my"},{"line":902,"kind":12,"containerName":"write_assembly","name":"primary_tag"},{"name":"$readobj","kind":13,"containerName":"write_assembly","line":902},{"line":902,"containerName":"write_assembly","kind":12,"name":"primary_id"},{"line":903,"containerName":"write_assembly","kind":13,"name":"$contigobj"},{"containerName":"write_assembly","kind":12,"name":"get_seq_coord","line":903},{"name":"$readobj","kind":13,"containerName":"write_assembly","line":903},{"name":"get_SeqFeatures","containerName":"write_assembly","kind":12,"line":904},{"localvar":"my","containerName":"write_assembly","kind":13,"name":"%readinfo","line":905,"definition":"my"},{"kind":13,"containerName":"write_assembly","name":"$readinfo","line":906},{"line":906,"kind":13,"containerName":"write_assembly","name":"$seq_name"},{"line":907,"kind":13,"containerName":"write_assembly","name":"$readinfo"},{"kind":13,"containerName":"write_assembly","name":"$asm_lend","line":907},{"line":908,"name":"$readinfo","kind":13,"containerName":"write_assembly"},{"line":908,"kind":13,"containerName":"write_assembly","name":"$asm_rend"},{"line":909,"kind":13,"containerName":"write_assembly","name":"$readinfo"},{"name":"$seq_lend","containerName":"write_assembly","kind":13,"line":909},{"name":"$readinfo","containerName":"write_assembly","kind":13,"line":910},{"name":"$seq_rend","kind":13,"containerName":"write_assembly","line":910},{"name":"$readinfo","containerName":"write_assembly","kind":13,"line":911},{"name":"$readanno","kind":13,"containerName":"write_assembly","line":911},{"line":911,"kind":12,"containerName":"write_assembly","name":"get_tag_values"},{"line":912,"kind":13,"containerName":"write_assembly","name":"$readinfo"},{"kind":13,"containerName":"write_assembly","name":"$readanno","line":912},{"line":912,"name":"get_tag_values","containerName":"write_assembly","kind":12},{"line":913,"name":"$readinfo","containerName":"write_assembly","kind":13},{"line":913,"kind":13,"containerName":"write_assembly","name":"$db"},{"kind":13,"containerName":"write_assembly","name":"$readinfo","line":914},{"containerName":"write_assembly","kind":13,"name":"$offset","line":914},{"kind":13,"containerName":"write_assembly","name":"$readinfo","line":915},{"line":915,"kind":13,"containerName":"write_assembly","name":"$readobj"},{"name":"seq","containerName":"write_assembly","kind":12,"line":915},{"containerName":"write_assembly","kind":13,"name":"$readinfo","line":918},{"line":918,"kind":13,"containerName":"write_assembly","name":"$readinfo"},{"line":919,"containerName":"write_assembly","kind":13,"name":"$readinfo"},{"line":919,"name":"$readinfo","kind":13,"containerName":"write_assembly"},{"line":922,"name":"$self","containerName":"write_assembly","kind":13},{"line":922,"containerName":"write_assembly","kind":12,"name":"_print"},{"kind":13,"containerName":"write_assembly","name":"$seqno","line":934},{"line":934,"kind":13,"containerName":"write_assembly","name":"$contiginfo"},{"kind":13,"containerName":"write_assembly","name":"$self","line":935},{"name":"_print","kind":12,"containerName":"write_assembly","line":935},{"line":936,"containerName":"write_assembly","kind":13,"name":"$seqno"},{"line":936,"name":"$contiginfo","kind":13,"containerName":"write_assembly"},{"name":"$i","kind":13,"containerName":"write_assembly","line":936},{"name":"$numobj","containerName":"write_assembly","kind":13,"line":936},{"kind":13,"containerName":"write_assembly","name":"$self","line":937},{"containerName":"write_assembly","kind":12,"name":"_print","line":937}],"signature":{"documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_hex2dec {\n    my ($self, $qual) = @_;\n    $qual =~ s/^0x(.*)$/$1/;\n    $qual =~ s/(..)/hex($1).' '/eg;\n    return $qual;\n}\n\n=head2 _qual_dec2hex\n\n    Title   : _qual_dec2hex\n    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);\n    Function: convert a decimal quality score into an hexadecimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_dec2hex {\n    my ($self, $qual) = @_;\n    $qual =~ s/(\\d+)\\s*/sprintf('%02X', $1)/eg;\n    $qual = '0x'.$qual;\n    return $qual;\n}\n\n=head2 _store_contig\n\n    Title   : _store_contig\n    Usage   : my $contigobj; $contigobj = $self->_store_contig(\n              \\%contiginfo, $contigobj, $scaffoldobj);\n    Function: store information of a contig belonging to a scaffold in the\n              appropriate object\n    Returns : Bio::Assembly::Contig object\n    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold\n\n\nsub _store_contig {\n    my ($self, $contiginfo, $contigobj, $scaffoldobj) = @_;\n\n    # Create a contig and attach it to scaffold\n    $contigobj = Bio::Assembly::Contig->new(\n        -id     => $$contiginfo{'asmbl_id'},\n        -source => $progname,\n        -strand => 1\n    );\n    $scaffoldobj->add_contig($contigobj);\n\n    # Create a gapped consensus sequence and attach it to contig\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $consensus = Bio::LocatableSeq->new(\n        -id    => $$contiginfo{'asmbl_id'},\n        -seq   => $$contiginfo{'lsequence'},\n        -start => 1,\n    );\n    $contigobj->set_consensus_sequence($consensus);\n\n    # Create an gapped consensus quality score and attach it to contig\n    $$contiginfo{'quality'} = $self->_qual_hex2dec($$contiginfo{'quality'});\n    my $qual = Bio::Seq::Quality->new(\n        -id   => $$contiginfo{'asmbl_id'},\n        -qual => $$contiginfo{'quality'}\n    );\n    $contigobj->set_consensus_quality($qual);\n\n    # Add other misc contig information as features of the contig\n    my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$$contiginfo{'asmbl_id'}\",\n        -start       => 1,\n        -end         => $contigobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n    );\n    $contigobj->add_features([ $contigtags ], 1);\n\n    return $contigobj;\n}\n\n=head2 _store_read\n\n    Title   : _store_read\n    Usage   : my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n    Function: store information of a read belonging to a contig in the appropriate object\n    Returns : Bio::LocatableSeq\n    Args    : hash, Bio::Assembly::Contig\n\n\nsub _store_read {\n   my ($self, $readinfo, $contigobj) = @_;\n\n   # Create an aligned read object\n   #$$readinfo{'llength'} = length($$readinfo{'lsequence'});\n   $$readinfo{'strand'}  = ($$readinfo{'seq_rend'} > $$readinfo{'seq_lend'} ? 1 : -1);\n   my $readobj = Bio::LocatableSeq->new(\n       # the ids of sequence objects are supposed to include the db name in it, i.e. \"big_db|seq1234\"\n       # that's how sequence ids coming from the fasta parser are at least\n       -display_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -primary_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -seq        => $$readinfo{'lsequence'},      \n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna'\n   );\n\n   # Add read location and sequence to contig (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => $readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigobj->id() }\n   );\n   $contigobj->set_seq_coord($alncoord, $readobj);\n\n   # Add quality clipping read information in contig features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_lend'});\n   $$readinfo{'clip_end'}   = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_rend'});\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_quality_clipping:'.$readobj->id,\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'}\n   );\n   $clipcoord->attach_seq($readobj);\n   $contigobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_main_read_feature:'.$readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n\n   return $readobj;\n}\n\n=head2 _store_singlet\n\n    Title   : _store_singlet\n    Usage   : my $singletobj = $self->_store_read(\\%readinfo, \\%contiginfo,\n                  $scaffoldobj);\n    Function: store information of a singlet belonging to a scaffold in the appropriate object\n    Returns : Bio::Assembly::Singlet\n    Args    : hash, hash, Bio::Assembly::Scaffold\n\n\nsub _store_singlet {\n    my ($self, $readinfo, $contiginfo, $scaffoldobj) = @_;\n    # Singlets in TIGR_Assembler are represented as a contig of one sequence\n    # We try to simulate this duality by playing around with the Singlet object\n    \n    my $contigid = $$contiginfo{'asmbl_id'};\n    my $readid   = $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'});\n    \n    # Create a sequence object\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $seqobj = Bio::Seq::Quality->new(\n       -primary_id => $contigid, # unique id in assembly (contig name)\n       -display_id => $readid,\n       -seq        => $$contiginfo{'lsequence'}, # do not use $$readinfo as ambiguities are uppercase\n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna',\n       -qual => $self->_qual_hex2dec($$contiginfo{'quality'})    \n   );\n\n   # Create singlet from sequence and add it to scaffold\n   my $singletobj = Bio::Assembly::Singlet->new( -seqref => $seqobj );\n   $scaffoldobj->add_singlet($singletobj);\n\n   # Add other misc contig information as features of the singlet\n   my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$contigid\",\n        -start       => 1,\n        -end         => $singletobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n   );\n   $singletobj->add_features([ $contigtags ], 1);\n\n   # Add read location and sequence to singlet features (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_aligned_coord:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $alncoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $alncoord ], 0);\n\n   # Add quality clipping read information in singlet features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $$readinfo{'seq_lend'};\n   $$readinfo{'clip_end'}   = $$readinfo{'seq_rend'};\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_quality_clipping:$readid\",\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $clipcoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_main_read_feature:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n      \n   return $singletobj;\n}\n\n=head2 write_assembly\n\n    Title   : write_assembly\n    Usage   : $ass_io->write_assembly($assembly)\n    Function: Write the assembly object in TIGR Assembler compatible tasm lassie  \n              format\n    Returns : 1 on success, 0 for error\n    Args    : A Bio::Assembly::Scaffold object","parameters":[{"label":"$self"},{"label":"@args"}],"label":"write_assembly($self,@args)"},"range":{"end":{"character":9999,"line":940},"start":{"line":666,"character":0}},"kind":12,"line":666},{"detail":"($self,$seq_string)","definition":"sub","name":"_perc_N","containerName":"main::","children":[{"name":"$self","localvar":"my","kind":13,"containerName":"_perc_N","line":960,"definition":"my"},{"name":"$seq_string","containerName":"_perc_N","kind":13,"line":960},{"line":961,"name":"$self","kind":13,"containerName":"_perc_N"},{"name":"throw","containerName":"_perc_N","kind":12,"line":961},{"name":"$seq_string","kind":13,"containerName":"_perc_N","line":961},{"definition":"my","name":"$perc_N","localvar":"my","containerName":"_perc_N","kind":13,"line":962},{"definition":"my","localvar":"my","kind":13,"containerName":"_perc_N","name":"$base","line":963},{"kind":13,"containerName":"_perc_N","name":"$seq_string","line":963},{"line":965,"kind":13,"containerName":"_perc_N","name":"$base"},{"line":965,"name":"$base","containerName":"_perc_N","kind":13},{"containerName":"_perc_N","kind":13,"name":"$perc_N","line":966},{"name":"$perc_N","containerName":"_perc_N","kind":13,"line":969},{"line":969,"name":"$perc_N","kind":13,"containerName":"_perc_N"},{"line":969,"containerName":"_perc_N","kind":13,"name":"$seq_string"},{"containerName":"_perc_N","kind":13,"name":"$perc_N","line":970}],"signature":{"parameters":[{"label":"$self"},{"label":"$seq_string"}],"documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_hex2dec {\n    my ($self, $qual) = @_;\n    $qual =~ s/^0x(.*)$/$1/;\n    $qual =~ s/(..)/hex($1).' '/eg;\n    return $qual;\n}\n\n=head2 _qual_dec2hex\n\n    Title   : _qual_dec2hex\n    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);\n    Function: convert a decimal quality score into an hexadecimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_dec2hex {\n    my ($self, $qual) = @_;\n    $qual =~ s/(\\d+)\\s*/sprintf('%02X', $1)/eg;\n    $qual = '0x'.$qual;\n    return $qual;\n}\n\n=head2 _store_contig\n\n    Title   : _store_contig\n    Usage   : my $contigobj; $contigobj = $self->_store_contig(\n              \\%contiginfo, $contigobj, $scaffoldobj);\n    Function: store information of a contig belonging to a scaffold in the\n              appropriate object\n    Returns : Bio::Assembly::Contig object\n    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold\n\n\nsub _store_contig {\n    my ($self, $contiginfo, $contigobj, $scaffoldobj) = @_;\n\n    # Create a contig and attach it to scaffold\n    $contigobj = Bio::Assembly::Contig->new(\n        -id     => $$contiginfo{'asmbl_id'},\n        -source => $progname,\n        -strand => 1\n    );\n    $scaffoldobj->add_contig($contigobj);\n\n    # Create a gapped consensus sequence and attach it to contig\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $consensus = Bio::LocatableSeq->new(\n        -id    => $$contiginfo{'asmbl_id'},\n        -seq   => $$contiginfo{'lsequence'},\n        -start => 1,\n    );\n    $contigobj->set_consensus_sequence($consensus);\n\n    # Create an gapped consensus quality score and attach it to contig\n    $$contiginfo{'quality'} = $self->_qual_hex2dec($$contiginfo{'quality'});\n    my $qual = Bio::Seq::Quality->new(\n        -id   => $$contiginfo{'asmbl_id'},\n        -qual => $$contiginfo{'quality'}\n    );\n    $contigobj->set_consensus_quality($qual);\n\n    # Add other misc contig information as features of the contig\n    my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$$contiginfo{'asmbl_id'}\",\n        -start       => 1,\n        -end         => $contigobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n    );\n    $contigobj->add_features([ $contigtags ], 1);\n\n    return $contigobj;\n}\n\n=head2 _store_read\n\n    Title   : _store_read\n    Usage   : my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n    Function: store information of a read belonging to a contig in the appropriate object\n    Returns : Bio::LocatableSeq\n    Args    : hash, Bio::Assembly::Contig\n\n\nsub _store_read {\n   my ($self, $readinfo, $contigobj) = @_;\n\n   # Create an aligned read object\n   #$$readinfo{'llength'} = length($$readinfo{'lsequence'});\n   $$readinfo{'strand'}  = ($$readinfo{'seq_rend'} > $$readinfo{'seq_lend'} ? 1 : -1);\n   my $readobj = Bio::LocatableSeq->new(\n       # the ids of sequence objects are supposed to include the db name in it, i.e. \"big_db|seq1234\"\n       # that's how sequence ids coming from the fasta parser are at least\n       -display_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -primary_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -seq        => $$readinfo{'lsequence'},      \n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna'\n   );\n\n   # Add read location and sequence to contig (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => $readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigobj->id() }\n   );\n   $contigobj->set_seq_coord($alncoord, $readobj);\n\n   # Add quality clipping read information in contig features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_lend'});\n   $$readinfo{'clip_end'}   = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_rend'});\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_quality_clipping:'.$readobj->id,\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'}\n   );\n   $clipcoord->attach_seq($readobj);\n   $contigobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_main_read_feature:'.$readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n\n   return $readobj;\n}\n\n=head2 _store_singlet\n\n    Title   : _store_singlet\n    Usage   : my $singletobj = $self->_store_read(\\%readinfo, \\%contiginfo,\n                  $scaffoldobj);\n    Function: store information of a singlet belonging to a scaffold in the appropriate object\n    Returns : Bio::Assembly::Singlet\n    Args    : hash, hash, Bio::Assembly::Scaffold\n\n\nsub _store_singlet {\n    my ($self, $readinfo, $contiginfo, $scaffoldobj) = @_;\n    # Singlets in TIGR_Assembler are represented as a contig of one sequence\n    # We try to simulate this duality by playing around with the Singlet object\n    \n    my $contigid = $$contiginfo{'asmbl_id'};\n    my $readid   = $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'});\n    \n    # Create a sequence object\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $seqobj = Bio::Seq::Quality->new(\n       -primary_id => $contigid, # unique id in assembly (contig name)\n       -display_id => $readid,\n       -seq        => $$contiginfo{'lsequence'}, # do not use $$readinfo as ambiguities are uppercase\n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna',\n       -qual => $self->_qual_hex2dec($$contiginfo{'quality'})    \n   );\n\n   # Create singlet from sequence and add it to scaffold\n   my $singletobj = Bio::Assembly::Singlet->new( -seqref => $seqobj );\n   $scaffoldobj->add_singlet($singletobj);\n\n   # Add other misc contig information as features of the singlet\n   my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$contigid\",\n        -start       => 1,\n        -end         => $singletobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n   );\n   $singletobj->add_features([ $contigtags ], 1);\n\n   # Add read location and sequence to singlet features (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_aligned_coord:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $alncoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $alncoord ], 0);\n\n   # Add quality clipping read information in singlet features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $$readinfo{'seq_lend'};\n   $$readinfo{'clip_end'}   = $$readinfo{'seq_rend'};\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_quality_clipping:$readid\",\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $clipcoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_main_read_feature:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n      \n   return $singletobj;\n}\n\n=head2 write_assembly\n\n    Title   : write_assembly\n    Usage   : $ass_io->write_assembly($assembly)\n    Function: Write the assembly object in TIGR Assembler compatible tasm lassie  \n              format\n    Returns : 1 on success, 0 for error\n    Args    : A Bio::Assembly::Scaffold object\n\n\nsub write_assembly {\n    my ($self,@args) = @_;    \n    my ($scaffoldobj, $singlets) = $self->_rearrange([qw(SCAFFOLD SINGLETS)], @args);\n    \n    # Sanity check\n    if ( !$scaffoldobj || !$scaffoldobj->isa('Bio::Assembly::Scaffold') ) {\n        $self->warn(\"Must provide a Bio::Align::AlignI object when calling\n            write_assembly\");\n        next;\n    }\n\n    # Get list of objects - contigs and singlets\n    my @cont_ids = $scaffoldobj->get_contig_ids;\n    my @sing_ids = $scaffoldobj->get_singlet_ids;\n    my %did;\n    my $decimal_format = '%.2f';\n    for (my $i = 0; $i < scalar @sing_ids ; $i++) {\n      # singlet display id (string)\n      my $display_id = $sing_ids[$i];\n      # singlet primary id (unique, numerical)\n      my $primary_id = $scaffoldobj->get_singlet_by_id($display_id)->seqref->primary_id;\n      $sing_ids[$i] = $primary_id;\n      $did{$primary_id} = $display_id;\n    }\n    my @ids = (@cont_ids, @sing_ids);\n    @ids = sort { $a <=> $b } @ids; # list with contig ids and singlet primary id\n    my $numobj = scalar @ids;\n\n    # Output all contigs and singlets (sorted by increasing id number)\n    for (my $i = 0 ; $i < $numobj ; $i++) {\n        \n        my $objid = $ids[$i];\n        \n        if (defined $did{$objid}) { \n            # This is a singlet\n            next unless ($singlets);\n\n            my $contigid = $objid;\n            my $readid   = $did{$objid};            \n            my $singletobj = $scaffoldobj->get_singlet_by_id($readid);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $singletobj->seqref->seq;\n            $contiginfo{'lsequence'}  = $contiginfo{'sequence'};\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$singletobj->seqref->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];   \n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf($decimal_format, 1);\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = 1;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n\n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n            \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n                        \n            # Get read information\n            my ($seq_name, $db) = $self->_split_seq_name_and_db($readid);\n            my $clipcoord = (grep\n                { $_->primary_tag eq \"_quality_clipping:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $alncoord  = (grep\n                { $_->primary_tag eq \"_aligned_coord:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $readanno = (grep\n                { $_->primary_tag eq \"_main_read_feature:$readid\" }\n                $singletobj->get_seq_coord($singletobj->seqref)->get_SeqFeatures\n            )[0];\n            my %readinfo;\n            $readinfo{'seq_name'}  = $seq_name;\n            $readinfo{'asm_lend'}  = $alncoord->location->start;\n            $readinfo{'asm_rend'}  = $alncoord->location->end;\n            $readinfo{'seq_lend'}  = $clipcoord->location->start;\n            $readinfo{'seq_rend'}  = $clipcoord->location->end;\n            $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n            $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n            $readinfo{'db'}        = $db;         \n            $readinfo{'offset'}    = 0;\n            # ambiguities in read sequence are uppercase\n            $readinfo{'lsequence'} = uc($contiginfo{'lsequence'});\n            \n            # Check that no tag value is undef\n            $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n            $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n\n            # Print read information\n            $self->_print(\n                \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                \"best\\t$readinfo{'best'}\\n\".\n                \"comment\\t$readinfo{'comment'}\\n\".\n                \"db\\t$readinfo{'db'}\\n\".\n                \"offset\\t$readinfo{'offset'}\\n\".\n                \"lsequence\\t$readinfo{'lsequence'}\\n\"\n            );\n            if ($i+1 < $numobj) {\n                $self->_print(\"|\\n\");\n            }\n        } else {\n            # This is a contig\n            my $contigid = $objid;\n            my $contigobj = $scaffoldobj->get_contig_by_id($contigid);\n\n            # Skip contigs of 1 sequence (singlets) if needed\n            next if ($contigobj->num_sequences == 1) && (!$singlets);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $contigobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $self->_ungap(\n                $contigobj->get_consensus_sequence->seq);\n            $contiginfo{'lsequence'}  = $contigobj->get_consensus_sequence->seq;\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$contigobj->get_consensus_quality->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];\n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf(\n                $decimal_format, $self->_redundancy($contigobj));\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = $contigobj->num_sequences;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n            \n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n                       \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n            my $seqno = 0;\n            for my $readobj ( $contigobj->each_seq() ) {\n                $seqno++;\n                \n                # Get read information\n                my ($seq_name, $db) = $self->_split_seq_name_and_db($readobj->id);\n                my ($asm_lend, $asm_rend, $seq_lend, $seq_rend, $offset)\n                    = $self->_coord($readobj, $contigobj);\n                my $readanno = ( grep \n                    { $_->primary_tag eq '_main_read_feature:'.$readobj->primary_id }\n                    $contigobj->get_seq_coord($readobj)->get_SeqFeatures\n                )[0];\n                my %readinfo;                \n                $readinfo{'seq_name'}  = $seq_name;\n                $readinfo{'asm_lend'}  = $asm_lend;\n                $readinfo{'asm_rend'}  = $asm_rend;\n                $readinfo{'seq_lend'}  = $seq_lend;\n                $readinfo{'seq_rend'}  = $seq_rend;                \n                $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n                $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n                $readinfo{'db'}        = $db;\n                $readinfo{'offset'}    = $offset;   \n                $readinfo{'lsequence'} = $readobj->seq(); \n                         \n                # Check that no tag value is undef\n                $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n                $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n    \n                # Print read information\n                $self->_print(\n                    \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                    \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                    \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                    \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                    \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                    \"best\\t$readinfo{'best'}\\n\".\n                    \"comment\\t$readinfo{'comment'}\\n\".\n                    \"db\\t$readinfo{'db'}\\n\".\n                    \"offset\\t$readinfo{'offset'}\\n\".\n                    \"lsequence\\t$readinfo{'lsequence'}\\n\"\n                );\n                if ($seqno < $contiginfo{'seqnum'}) {\n                    $self->_print(\"\\n\");\n                } elsif (($seqno == $contiginfo{'seqnum'}) && ($i+1 < $numobj)) {\n                    $self->_print(\"|\\n\");\n                }\n            }\n        }\n    }\n    return 1;\n}\n\n=head2 _perc_N\n\n    Title   : _perc_N\n    Usage   : my $perc_N = $ass_io->_perc_N($sequence_string)\n    Function: Calculate the percent of ambiguities in a sequence.\n              M R W S Y K X N are regarded as ambiguites in an aligned read\n              sequence by TIGR Assembler. In the case of a gapped contig\n              consensus sequence, all lowercase symbols are ambiguities, i.e.:\n              a c g t u m r w s y k x n.\n    Returns : decimal number\n    Args    : string","label":"_perc_N($self,$seq_string)"},"kind":12,"range":{"end":{"character":9999,"line":971},"start":{"character":0,"line":959}},"line":959},{"name":"_redundancy","containerName":"main::","children":[{"kind":13,"localvar":"my","containerName":"_redundancy","name":"$self","line":988,"definition":"my"},{"line":988,"name":"$contigobj","kind":13,"containerName":"_redundancy"},{"name":"$redundancy","containerName":"_redundancy","localvar":"my","kind":13,"line":989,"definition":"my"},{"definition":"my","line":992,"localvar":"my","kind":13,"containerName":"_redundancy","name":"$read_tot"},{"definition":"my","name":"$readobj","localvar":"my","containerName":"_redundancy","kind":13,"line":993},{"line":993,"kind":13,"containerName":"_redundancy","name":"$contigobj"},{"line":993,"name":"each_seq","kind":12,"containerName":"_redundancy"},{"definition":"my","line":994,"name":"$read_length","localvar":"my","kind":13,"containerName":"_redundancy"},{"name":"$readobj","kind":13,"containerName":"_redundancy","line":994},{"kind":12,"containerName":"_redundancy","name":"seq","line":994},{"line":995,"containerName":"_redundancy","kind":13,"name":"$read_tot"},{"line":995,"name":"$read_length","containerName":"_redundancy","kind":13},{"kind":13,"containerName":"_redundancy","name":"$redundancy","line":997},{"line":997,"name":"$read_tot","kind":13,"containerName":"_redundancy"},{"definition":"my","name":"$consensus_sequence","containerName":"_redundancy","localvar":"my","kind":13,"line":1000},{"name":"$contigobj","containerName":"_redundancy","kind":13,"line":1000},{"line":1000,"kind":12,"containerName":"_redundancy","name":"get_consensus_sequence"},{"line":1000,"kind":12,"containerName":"_redundancy","name":"seq"},{"definition":"my","name":"@consensus_gaps","localvar":"my","kind":13,"containerName":"_redundancy","line":1001},{"containerName":"_redundancy","kind":13,"name":"$contigobj","line":1002},{"name":"_register_gaps","containerName":"_redundancy","kind":12,"line":1002},{"kind":13,"containerName":"_redundancy","name":"$consensus_sequence","line":1002},{"line":1002,"kind":13,"containerName":"_redundancy","name":"@consensus_gaps"},{"localvar":"my","kind":13,"containerName":"_redundancy","name":"$respected_gaps","line":1003,"definition":"my"},{"name":"@consensus_gaps","kind":13,"containerName":"_redundancy","line":1003},{"containerName":"_redundancy","kind":13,"name":"$respected_gaps","line":1004},{"definition":"my","containerName":"_redundancy","localvar":"my","kind":13,"name":"@cons_arr","line":1005},{"line":1005,"name":"$consensus_sequence","containerName":"_redundancy","kind":13},{"definition":"my","kind":13,"localvar":"my","containerName":"_redundancy","name":"$gap_pos_cons","line":1006},{"kind":13,"containerName":"_redundancy","name":"@consensus_gaps","line":1006},{"line":1007,"name":"$readobj","containerName":"_redundancy","localvar":"my","kind":13,"definition":"my"},{"name":"$contigobj","kind":13,"containerName":"_redundancy","line":1007},{"kind":12,"containerName":"_redundancy","name":"each_seq","line":1007},{"name":"$readid","localvar":"my","containerName":"_redundancy","kind":13,"line":1008,"definition":"my"},{"line":1008,"kind":13,"containerName":"_redundancy","name":"$readobj"},{"name":"id","containerName":"_redundancy","kind":12,"line":1008},{"definition":"my","name":"$read_start","localvar":"my","containerName":"_redundancy","kind":13,"line":1009},{"line":1009,"name":"$contigobj","containerName":"_redundancy","kind":13},{"line":1009,"containerName":"_redundancy","kind":12,"name":"change_coord"},{"line":1010,"kind":13,"containerName":"_redundancy","name":"$readobj"},{"name":"start","containerName":"_redundancy","kind":12,"line":1010},{"definition":"my","name":"$read_end","kind":13,"localvar":"my","containerName":"_redundancy","line":1011},{"line":1011,"name":"$contigobj","containerName":"_redundancy","kind":13},{"name":"change_coord","containerName":"_redundancy","kind":12,"line":1011},{"kind":13,"containerName":"_redundancy","name":"$readobj","line":1012},{"name":"end","kind":12,"containerName":"_redundancy","line":1012},{"line":1014,"kind":13,"containerName":"_redundancy","name":"$gap_pos_cons"},{"line":1014,"name":"$read_start","containerName":"_redundancy","kind":13},{"line":1015,"kind":13,"containerName":"_redundancy","name":"$gap_pos_cons"},{"line":1015,"name":"$read_end","containerName":"_redundancy","kind":13},{"name":"@read_arr","containerName":"_redundancy","localvar":"my","kind":13,"line":1017,"definition":"my"},{"name":"$readobj","containerName":"_redundancy","kind":13,"line":1017},{"containerName":"_redundancy","kind":12,"name":"seq","line":1017},{"line":1018,"kind":13,"localvar":"my","containerName":"_redundancy","name":"$gap_pos_read","definition":"my"},{"containerName":"_redundancy","kind":13,"name":"$contigobj","line":1018},{"name":"change_coord","containerName":"_redundancy","kind":12,"line":1018},{"line":1019,"name":"$gap_pos_cons","containerName":"_redundancy","kind":13},{"line":1020,"kind":13,"containerName":"_redundancy","name":"$read_arr"},{"kind":13,"containerName":"_redundancy","name":"$gap_pos_read","line":1020},{"name":"$cons_arr","kind":13,"containerName":"_redundancy","line":1020},{"containerName":"_redundancy","kind":13,"name":"$gap_pos_cons","line":1020},{"line":1021,"kind":13,"containerName":"_redundancy","name":"$respected_gaps"},{"line":1026,"name":"$redundancy","kind":13,"containerName":"_redundancy"},{"line":1026,"name":"$respected_gaps","containerName":"_redundancy","kind":13},{"line":1029,"name":"$contig_length","containerName":"_redundancy","localvar":"my","kind":13,"definition":"my"},{"line":1029,"kind":13,"containerName":"_redundancy","name":"$self"},{"name":"_ungap","containerName":"_redundancy","kind":12,"line":1029},{"kind":13,"containerName":"_redundancy","name":"$contigobj","line":1029},{"containerName":"_redundancy","kind":12,"name":"get_consensus_sequence","line":1029},{"name":"seq","kind":12,"containerName":"_redundancy","line":1029},{"line":1030,"name":"$redundancy","kind":13,"containerName":"_redundancy"},{"name":"$contig_length","kind":13,"containerName":"_redundancy","line":1030},{"line":1032,"name":"$redundancy","containerName":"_redundancy","kind":13}],"detail":"($self,$contigobj)","definition":"sub","range":{"start":{"character":0,"line":984},"end":{"line":1033,"character":9999}},"kind":12,"line":984,"signature":{"parameters":[{"label":"$self"},{"label":"$contigobj"}],"documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_hex2dec {\n    my ($self, $qual) = @_;\n    $qual =~ s/^0x(.*)$/$1/;\n    $qual =~ s/(..)/hex($1).' '/eg;\n    return $qual;\n}\n\n=head2 _qual_dec2hex\n\n    Title   : _qual_dec2hex\n    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);\n    Function: convert a decimal quality score into an hexadecimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_dec2hex {\n    my ($self, $qual) = @_;\n    $qual =~ s/(\\d+)\\s*/sprintf('%02X', $1)/eg;\n    $qual = '0x'.$qual;\n    return $qual;\n}\n\n=head2 _store_contig\n\n    Title   : _store_contig\n    Usage   : my $contigobj; $contigobj = $self->_store_contig(\n              \\%contiginfo, $contigobj, $scaffoldobj);\n    Function: store information of a contig belonging to a scaffold in the\n              appropriate object\n    Returns : Bio::Assembly::Contig object\n    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold\n\n\nsub _store_contig {\n    my ($self, $contiginfo, $contigobj, $scaffoldobj) = @_;\n\n    # Create a contig and attach it to scaffold\n    $contigobj = Bio::Assembly::Contig->new(\n        -id     => $$contiginfo{'asmbl_id'},\n        -source => $progname,\n        -strand => 1\n    );\n    $scaffoldobj->add_contig($contigobj);\n\n    # Create a gapped consensus sequence and attach it to contig\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $consensus = Bio::LocatableSeq->new(\n        -id    => $$contiginfo{'asmbl_id'},\n        -seq   => $$contiginfo{'lsequence'},\n        -start => 1,\n    );\n    $contigobj->set_consensus_sequence($consensus);\n\n    # Create an gapped consensus quality score and attach it to contig\n    $$contiginfo{'quality'} = $self->_qual_hex2dec($$contiginfo{'quality'});\n    my $qual = Bio::Seq::Quality->new(\n        -id   => $$contiginfo{'asmbl_id'},\n        -qual => $$contiginfo{'quality'}\n    );\n    $contigobj->set_consensus_quality($qual);\n\n    # Add other misc contig information as features of the contig\n    my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$$contiginfo{'asmbl_id'}\",\n        -start       => 1,\n        -end         => $contigobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n    );\n    $contigobj->add_features([ $contigtags ], 1);\n\n    return $contigobj;\n}\n\n=head2 _store_read\n\n    Title   : _store_read\n    Usage   : my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n    Function: store information of a read belonging to a contig in the appropriate object\n    Returns : Bio::LocatableSeq\n    Args    : hash, Bio::Assembly::Contig\n\n\nsub _store_read {\n   my ($self, $readinfo, $contigobj) = @_;\n\n   # Create an aligned read object\n   #$$readinfo{'llength'} = length($$readinfo{'lsequence'});\n   $$readinfo{'strand'}  = ($$readinfo{'seq_rend'} > $$readinfo{'seq_lend'} ? 1 : -1);\n   my $readobj = Bio::LocatableSeq->new(\n       # the ids of sequence objects are supposed to include the db name in it, i.e. \"big_db|seq1234\"\n       # that's how sequence ids coming from the fasta parser are at least\n       -display_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -primary_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -seq        => $$readinfo{'lsequence'},      \n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna'\n   );\n\n   # Add read location and sequence to contig (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => $readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigobj->id() }\n   );\n   $contigobj->set_seq_coord($alncoord, $readobj);\n\n   # Add quality clipping read information in contig features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_lend'});\n   $$readinfo{'clip_end'}   = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_rend'});\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_quality_clipping:'.$readobj->id,\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'}\n   );\n   $clipcoord->attach_seq($readobj);\n   $contigobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_main_read_feature:'.$readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n\n   return $readobj;\n}\n\n=head2 _store_singlet\n\n    Title   : _store_singlet\n    Usage   : my $singletobj = $self->_store_read(\\%readinfo, \\%contiginfo,\n                  $scaffoldobj);\n    Function: store information of a singlet belonging to a scaffold in the appropriate object\n    Returns : Bio::Assembly::Singlet\n    Args    : hash, hash, Bio::Assembly::Scaffold\n\n\nsub _store_singlet {\n    my ($self, $readinfo, $contiginfo, $scaffoldobj) = @_;\n    # Singlets in TIGR_Assembler are represented as a contig of one sequence\n    # We try to simulate this duality by playing around with the Singlet object\n    \n    my $contigid = $$contiginfo{'asmbl_id'};\n    my $readid   = $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'});\n    \n    # Create a sequence object\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $seqobj = Bio::Seq::Quality->new(\n       -primary_id => $contigid, # unique id in assembly (contig name)\n       -display_id => $readid,\n       -seq        => $$contiginfo{'lsequence'}, # do not use $$readinfo as ambiguities are uppercase\n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna',\n       -qual => $self->_qual_hex2dec($$contiginfo{'quality'})    \n   );\n\n   # Create singlet from sequence and add it to scaffold\n   my $singletobj = Bio::Assembly::Singlet->new( -seqref => $seqobj );\n   $scaffoldobj->add_singlet($singletobj);\n\n   # Add other misc contig information as features of the singlet\n   my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$contigid\",\n        -start       => 1,\n        -end         => $singletobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n   );\n   $singletobj->add_features([ $contigtags ], 1);\n\n   # Add read location and sequence to singlet features (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_aligned_coord:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $alncoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $alncoord ], 0);\n\n   # Add quality clipping read information in singlet features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $$readinfo{'seq_lend'};\n   $$readinfo{'clip_end'}   = $$readinfo{'seq_rend'};\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_quality_clipping:$readid\",\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $clipcoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_main_read_feature:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n      \n   return $singletobj;\n}\n\n=head2 write_assembly\n\n    Title   : write_assembly\n    Usage   : $ass_io->write_assembly($assembly)\n    Function: Write the assembly object in TIGR Assembler compatible tasm lassie  \n              format\n    Returns : 1 on success, 0 for error\n    Args    : A Bio::Assembly::Scaffold object\n\n\nsub write_assembly {\n    my ($self,@args) = @_;    \n    my ($scaffoldobj, $singlets) = $self->_rearrange([qw(SCAFFOLD SINGLETS)], @args);\n    \n    # Sanity check\n    if ( !$scaffoldobj || !$scaffoldobj->isa('Bio::Assembly::Scaffold') ) {\n        $self->warn(\"Must provide a Bio::Align::AlignI object when calling\n            write_assembly\");\n        next;\n    }\n\n    # Get list of objects - contigs and singlets\n    my @cont_ids = $scaffoldobj->get_contig_ids;\n    my @sing_ids = $scaffoldobj->get_singlet_ids;\n    my %did;\n    my $decimal_format = '%.2f';\n    for (my $i = 0; $i < scalar @sing_ids ; $i++) {\n      # singlet display id (string)\n      my $display_id = $sing_ids[$i];\n      # singlet primary id (unique, numerical)\n      my $primary_id = $scaffoldobj->get_singlet_by_id($display_id)->seqref->primary_id;\n      $sing_ids[$i] = $primary_id;\n      $did{$primary_id} = $display_id;\n    }\n    my @ids = (@cont_ids, @sing_ids);\n    @ids = sort { $a <=> $b } @ids; # list with contig ids and singlet primary id\n    my $numobj = scalar @ids;\n\n    # Output all contigs and singlets (sorted by increasing id number)\n    for (my $i = 0 ; $i < $numobj ; $i++) {\n        \n        my $objid = $ids[$i];\n        \n        if (defined $did{$objid}) { \n            # This is a singlet\n            next unless ($singlets);\n\n            my $contigid = $objid;\n            my $readid   = $did{$objid};            \n            my $singletobj = $scaffoldobj->get_singlet_by_id($readid);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $singletobj->seqref->seq;\n            $contiginfo{'lsequence'}  = $contiginfo{'sequence'};\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$singletobj->seqref->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];   \n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf($decimal_format, 1);\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = 1;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n\n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n            \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n                        \n            # Get read information\n            my ($seq_name, $db) = $self->_split_seq_name_and_db($readid);\n            my $clipcoord = (grep\n                { $_->primary_tag eq \"_quality_clipping:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $alncoord  = (grep\n                { $_->primary_tag eq \"_aligned_coord:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $readanno = (grep\n                { $_->primary_tag eq \"_main_read_feature:$readid\" }\n                $singletobj->get_seq_coord($singletobj->seqref)->get_SeqFeatures\n            )[0];\n            my %readinfo;\n            $readinfo{'seq_name'}  = $seq_name;\n            $readinfo{'asm_lend'}  = $alncoord->location->start;\n            $readinfo{'asm_rend'}  = $alncoord->location->end;\n            $readinfo{'seq_lend'}  = $clipcoord->location->start;\n            $readinfo{'seq_rend'}  = $clipcoord->location->end;\n            $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n            $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n            $readinfo{'db'}        = $db;         \n            $readinfo{'offset'}    = 0;\n            # ambiguities in read sequence are uppercase\n            $readinfo{'lsequence'} = uc($contiginfo{'lsequence'});\n            \n            # Check that no tag value is undef\n            $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n            $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n\n            # Print read information\n            $self->_print(\n                \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                \"best\\t$readinfo{'best'}\\n\".\n                \"comment\\t$readinfo{'comment'}\\n\".\n                \"db\\t$readinfo{'db'}\\n\".\n                \"offset\\t$readinfo{'offset'}\\n\".\n                \"lsequence\\t$readinfo{'lsequence'}\\n\"\n            );\n            if ($i+1 < $numobj) {\n                $self->_print(\"|\\n\");\n            }\n        } else {\n            # This is a contig\n            my $contigid = $objid;\n            my $contigobj = $scaffoldobj->get_contig_by_id($contigid);\n\n            # Skip contigs of 1 sequence (singlets) if needed\n            next if ($contigobj->num_sequences == 1) && (!$singlets);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $contigobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $self->_ungap(\n                $contigobj->get_consensus_sequence->seq);\n            $contiginfo{'lsequence'}  = $contigobj->get_consensus_sequence->seq;\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$contigobj->get_consensus_quality->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];\n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf(\n                $decimal_format, $self->_redundancy($contigobj));\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = $contigobj->num_sequences;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n            \n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n                       \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n            my $seqno = 0;\n            for my $readobj ( $contigobj->each_seq() ) {\n                $seqno++;\n                \n                # Get read information\n                my ($seq_name, $db) = $self->_split_seq_name_and_db($readobj->id);\n                my ($asm_lend, $asm_rend, $seq_lend, $seq_rend, $offset)\n                    = $self->_coord($readobj, $contigobj);\n                my $readanno = ( grep \n                    { $_->primary_tag eq '_main_read_feature:'.$readobj->primary_id }\n                    $contigobj->get_seq_coord($readobj)->get_SeqFeatures\n                )[0];\n                my %readinfo;                \n                $readinfo{'seq_name'}  = $seq_name;\n                $readinfo{'asm_lend'}  = $asm_lend;\n                $readinfo{'asm_rend'}  = $asm_rend;\n                $readinfo{'seq_lend'}  = $seq_lend;\n                $readinfo{'seq_rend'}  = $seq_rend;                \n                $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n                $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n                $readinfo{'db'}        = $db;\n                $readinfo{'offset'}    = $offset;   \n                $readinfo{'lsequence'} = $readobj->seq(); \n                         \n                # Check that no tag value is undef\n                $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n                $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n    \n                # Print read information\n                $self->_print(\n                    \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                    \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                    \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                    \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                    \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                    \"best\\t$readinfo{'best'}\\n\".\n                    \"comment\\t$readinfo{'comment'}\\n\".\n                    \"db\\t$readinfo{'db'}\\n\".\n                    \"offset\\t$readinfo{'offset'}\\n\".\n                    \"lsequence\\t$readinfo{'lsequence'}\\n\"\n                );\n                if ($seqno < $contiginfo{'seqnum'}) {\n                    $self->_print(\"\\n\");\n                } elsif (($seqno == $contiginfo{'seqnum'}) && ($i+1 < $numobj)) {\n                    $self->_print(\"|\\n\");\n                }\n            }\n        }\n    }\n    return 1;\n}\n\n=head2 _perc_N\n\n    Title   : _perc_N\n    Usage   : my $perc_N = $ass_io->_perc_N($sequence_string)\n    Function: Calculate the percent of ambiguities in a sequence.\n              M R W S Y K X N are regarded as ambiguites in an aligned read\n              sequence by TIGR Assembler. In the case of a gapped contig\n              consensus sequence, all lowercase symbols are ambiguities, i.e.:\n              a c g t u m r w s y k x n.\n    Returns : decimal number\n    Args    : string\n\n\nsub _perc_N {\n    my ($self, $seq_string) = @_;\n    $self->throw(\"Cannot accept an empty sequence\") if length($seq_string) == 0;\n    my $perc_N = 0;\n    for my $base ( split //, $seq_string ) {\n        # individual base matches an ambiguity?\n        if (( $base =~ m/[x|n|m|r|w|s|y|k]/i ) || ( $base =~ m/[a|c|g|t|u]/ ) ) {\n            $perc_N++;\n        }\n    }\n    $perc_N = $perc_N * 100 / length $seq_string;\n    return $perc_N;\n}\n\n=head2 _redundancy\n\n    Title   : _redundancy\n    Usage   : my $ref = $ass_io->_redundancy($contigobj)\n    Function: Calculate the fold coverage (redundancy) of a contig consensus\n              (average number of read base pairs covering the consensus)\n    Returns : decimal number\n    Args    : Bio::Assembly::Contig","label":"_redundancy($self,$contigobj)"}},{"definition":"sub","detail":"($self,$seq_string)","children":[{"line":1046,"kind":13,"localvar":"my","containerName":"_ungap","name":"$self","definition":"my"},{"line":1046,"kind":13,"containerName":"_ungap","name":"$seq_string"},{"line":1047,"kind":13,"containerName":"_ungap","name":"$seq_string"},{"line":1048,"name":"$seq_string","containerName":"_ungap","kind":13}],"name":"_ungap","containerName":"main::","signature":{"parameters":[{"label":"$self"},{"label":"$seq_string"}],"documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_hex2dec {\n    my ($self, $qual) = @_;\n    $qual =~ s/^0x(.*)$/$1/;\n    $qual =~ s/(..)/hex($1).' '/eg;\n    return $qual;\n}\n\n=head2 _qual_dec2hex\n\n    Title   : _qual_dec2hex\n    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);\n    Function: convert a decimal quality score into an hexadecimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_dec2hex {\n    my ($self, $qual) = @_;\n    $qual =~ s/(\\d+)\\s*/sprintf('%02X', $1)/eg;\n    $qual = '0x'.$qual;\n    return $qual;\n}\n\n=head2 _store_contig\n\n    Title   : _store_contig\n    Usage   : my $contigobj; $contigobj = $self->_store_contig(\n              \\%contiginfo, $contigobj, $scaffoldobj);\n    Function: store information of a contig belonging to a scaffold in the\n              appropriate object\n    Returns : Bio::Assembly::Contig object\n    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold\n\n\nsub _store_contig {\n    my ($self, $contiginfo, $contigobj, $scaffoldobj) = @_;\n\n    # Create a contig and attach it to scaffold\n    $contigobj = Bio::Assembly::Contig->new(\n        -id     => $$contiginfo{'asmbl_id'},\n        -source => $progname,\n        -strand => 1\n    );\n    $scaffoldobj->add_contig($contigobj);\n\n    # Create a gapped consensus sequence and attach it to contig\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $consensus = Bio::LocatableSeq->new(\n        -id    => $$contiginfo{'asmbl_id'},\n        -seq   => $$contiginfo{'lsequence'},\n        -start => 1,\n    );\n    $contigobj->set_consensus_sequence($consensus);\n\n    # Create an gapped consensus quality score and attach it to contig\n    $$contiginfo{'quality'} = $self->_qual_hex2dec($$contiginfo{'quality'});\n    my $qual = Bio::Seq::Quality->new(\n        -id   => $$contiginfo{'asmbl_id'},\n        -qual => $$contiginfo{'quality'}\n    );\n    $contigobj->set_consensus_quality($qual);\n\n    # Add other misc contig information as features of the contig\n    my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$$contiginfo{'asmbl_id'}\",\n        -start       => 1,\n        -end         => $contigobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n    );\n    $contigobj->add_features([ $contigtags ], 1);\n\n    return $contigobj;\n}\n\n=head2 _store_read\n\n    Title   : _store_read\n    Usage   : my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n    Function: store information of a read belonging to a contig in the appropriate object\n    Returns : Bio::LocatableSeq\n    Args    : hash, Bio::Assembly::Contig\n\n\nsub _store_read {\n   my ($self, $readinfo, $contigobj) = @_;\n\n   # Create an aligned read object\n   #$$readinfo{'llength'} = length($$readinfo{'lsequence'});\n   $$readinfo{'strand'}  = ($$readinfo{'seq_rend'} > $$readinfo{'seq_lend'} ? 1 : -1);\n   my $readobj = Bio::LocatableSeq->new(\n       # the ids of sequence objects are supposed to include the db name in it, i.e. \"big_db|seq1234\"\n       # that's how sequence ids coming from the fasta parser are at least\n       -display_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -primary_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -seq        => $$readinfo{'lsequence'},      \n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna'\n   );\n\n   # Add read location and sequence to contig (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => $readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigobj->id() }\n   );\n   $contigobj->set_seq_coord($alncoord, $readobj);\n\n   # Add quality clipping read information in contig features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_lend'});\n   $$readinfo{'clip_end'}   = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_rend'});\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_quality_clipping:'.$readobj->id,\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'}\n   );\n   $clipcoord->attach_seq($readobj);\n   $contigobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_main_read_feature:'.$readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n\n   return $readobj;\n}\n\n=head2 _store_singlet\n\n    Title   : _store_singlet\n    Usage   : my $singletobj = $self->_store_read(\\%readinfo, \\%contiginfo,\n                  $scaffoldobj);\n    Function: store information of a singlet belonging to a scaffold in the appropriate object\n    Returns : Bio::Assembly::Singlet\n    Args    : hash, hash, Bio::Assembly::Scaffold\n\n\nsub _store_singlet {\n    my ($self, $readinfo, $contiginfo, $scaffoldobj) = @_;\n    # Singlets in TIGR_Assembler are represented as a contig of one sequence\n    # We try to simulate this duality by playing around with the Singlet object\n    \n    my $contigid = $$contiginfo{'asmbl_id'};\n    my $readid   = $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'});\n    \n    # Create a sequence object\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $seqobj = Bio::Seq::Quality->new(\n       -primary_id => $contigid, # unique id in assembly (contig name)\n       -display_id => $readid,\n       -seq        => $$contiginfo{'lsequence'}, # do not use $$readinfo as ambiguities are uppercase\n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna',\n       -qual => $self->_qual_hex2dec($$contiginfo{'quality'})    \n   );\n\n   # Create singlet from sequence and add it to scaffold\n   my $singletobj = Bio::Assembly::Singlet->new( -seqref => $seqobj );\n   $scaffoldobj->add_singlet($singletobj);\n\n   # Add other misc contig information as features of the singlet\n   my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$contigid\",\n        -start       => 1,\n        -end         => $singletobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n   );\n   $singletobj->add_features([ $contigtags ], 1);\n\n   # Add read location and sequence to singlet features (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_aligned_coord:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $alncoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $alncoord ], 0);\n\n   # Add quality clipping read information in singlet features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $$readinfo{'seq_lend'};\n   $$readinfo{'clip_end'}   = $$readinfo{'seq_rend'};\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_quality_clipping:$readid\",\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $clipcoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_main_read_feature:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n      \n   return $singletobj;\n}\n\n=head2 write_assembly\n\n    Title   : write_assembly\n    Usage   : $ass_io->write_assembly($assembly)\n    Function: Write the assembly object in TIGR Assembler compatible tasm lassie  \n              format\n    Returns : 1 on success, 0 for error\n    Args    : A Bio::Assembly::Scaffold object\n\n\nsub write_assembly {\n    my ($self,@args) = @_;    \n    my ($scaffoldobj, $singlets) = $self->_rearrange([qw(SCAFFOLD SINGLETS)], @args);\n    \n    # Sanity check\n    if ( !$scaffoldobj || !$scaffoldobj->isa('Bio::Assembly::Scaffold') ) {\n        $self->warn(\"Must provide a Bio::Align::AlignI object when calling\n            write_assembly\");\n        next;\n    }\n\n    # Get list of objects - contigs and singlets\n    my @cont_ids = $scaffoldobj->get_contig_ids;\n    my @sing_ids = $scaffoldobj->get_singlet_ids;\n    my %did;\n    my $decimal_format = '%.2f';\n    for (my $i = 0; $i < scalar @sing_ids ; $i++) {\n      # singlet display id (string)\n      my $display_id = $sing_ids[$i];\n      # singlet primary id (unique, numerical)\n      my $primary_id = $scaffoldobj->get_singlet_by_id($display_id)->seqref->primary_id;\n      $sing_ids[$i] = $primary_id;\n      $did{$primary_id} = $display_id;\n    }\n    my @ids = (@cont_ids, @sing_ids);\n    @ids = sort { $a <=> $b } @ids; # list with contig ids and singlet primary id\n    my $numobj = scalar @ids;\n\n    # Output all contigs and singlets (sorted by increasing id number)\n    for (my $i = 0 ; $i < $numobj ; $i++) {\n        \n        my $objid = $ids[$i];\n        \n        if (defined $did{$objid}) { \n            # This is a singlet\n            next unless ($singlets);\n\n            my $contigid = $objid;\n            my $readid   = $did{$objid};            \n            my $singletobj = $scaffoldobj->get_singlet_by_id($readid);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $singletobj->seqref->seq;\n            $contiginfo{'lsequence'}  = $contiginfo{'sequence'};\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$singletobj->seqref->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];   \n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf($decimal_format, 1);\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = 1;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n\n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n            \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n                        \n            # Get read information\n            my ($seq_name, $db) = $self->_split_seq_name_and_db($readid);\n            my $clipcoord = (grep\n                { $_->primary_tag eq \"_quality_clipping:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $alncoord  = (grep\n                { $_->primary_tag eq \"_aligned_coord:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $readanno = (grep\n                { $_->primary_tag eq \"_main_read_feature:$readid\" }\n                $singletobj->get_seq_coord($singletobj->seqref)->get_SeqFeatures\n            )[0];\n            my %readinfo;\n            $readinfo{'seq_name'}  = $seq_name;\n            $readinfo{'asm_lend'}  = $alncoord->location->start;\n            $readinfo{'asm_rend'}  = $alncoord->location->end;\n            $readinfo{'seq_lend'}  = $clipcoord->location->start;\n            $readinfo{'seq_rend'}  = $clipcoord->location->end;\n            $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n            $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n            $readinfo{'db'}        = $db;         \n            $readinfo{'offset'}    = 0;\n            # ambiguities in read sequence are uppercase\n            $readinfo{'lsequence'} = uc($contiginfo{'lsequence'});\n            \n            # Check that no tag value is undef\n            $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n            $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n\n            # Print read information\n            $self->_print(\n                \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                \"best\\t$readinfo{'best'}\\n\".\n                \"comment\\t$readinfo{'comment'}\\n\".\n                \"db\\t$readinfo{'db'}\\n\".\n                \"offset\\t$readinfo{'offset'}\\n\".\n                \"lsequence\\t$readinfo{'lsequence'}\\n\"\n            );\n            if ($i+1 < $numobj) {\n                $self->_print(\"|\\n\");\n            }\n        } else {\n            # This is a contig\n            my $contigid = $objid;\n            my $contigobj = $scaffoldobj->get_contig_by_id($contigid);\n\n            # Skip contigs of 1 sequence (singlets) if needed\n            next if ($contigobj->num_sequences == 1) && (!$singlets);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $contigobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $self->_ungap(\n                $contigobj->get_consensus_sequence->seq);\n            $contiginfo{'lsequence'}  = $contigobj->get_consensus_sequence->seq;\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$contigobj->get_consensus_quality->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];\n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf(\n                $decimal_format, $self->_redundancy($contigobj));\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = $contigobj->num_sequences;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n            \n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n                       \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n            my $seqno = 0;\n            for my $readobj ( $contigobj->each_seq() ) {\n                $seqno++;\n                \n                # Get read information\n                my ($seq_name, $db) = $self->_split_seq_name_and_db($readobj->id);\n                my ($asm_lend, $asm_rend, $seq_lend, $seq_rend, $offset)\n                    = $self->_coord($readobj, $contigobj);\n                my $readanno = ( grep \n                    { $_->primary_tag eq '_main_read_feature:'.$readobj->primary_id }\n                    $contigobj->get_seq_coord($readobj)->get_SeqFeatures\n                )[0];\n                my %readinfo;                \n                $readinfo{'seq_name'}  = $seq_name;\n                $readinfo{'asm_lend'}  = $asm_lend;\n                $readinfo{'asm_rend'}  = $asm_rend;\n                $readinfo{'seq_lend'}  = $seq_lend;\n                $readinfo{'seq_rend'}  = $seq_rend;                \n                $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n                $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n                $readinfo{'db'}        = $db;\n                $readinfo{'offset'}    = $offset;   \n                $readinfo{'lsequence'} = $readobj->seq(); \n                         \n                # Check that no tag value is undef\n                $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n                $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n    \n                # Print read information\n                $self->_print(\n                    \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                    \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                    \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                    \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                    \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                    \"best\\t$readinfo{'best'}\\n\".\n                    \"comment\\t$readinfo{'comment'}\\n\".\n                    \"db\\t$readinfo{'db'}\\n\".\n                    \"offset\\t$readinfo{'offset'}\\n\".\n                    \"lsequence\\t$readinfo{'lsequence'}\\n\"\n                );\n                if ($seqno < $contiginfo{'seqnum'}) {\n                    $self->_print(\"\\n\");\n                } elsif (($seqno == $contiginfo{'seqnum'}) && ($i+1 < $numobj)) {\n                    $self->_print(\"|\\n\");\n                }\n            }\n        }\n    }\n    return 1;\n}\n\n=head2 _perc_N\n\n    Title   : _perc_N\n    Usage   : my $perc_N = $ass_io->_perc_N($sequence_string)\n    Function: Calculate the percent of ambiguities in a sequence.\n              M R W S Y K X N are regarded as ambiguites in an aligned read\n              sequence by TIGR Assembler. In the case of a gapped contig\n              consensus sequence, all lowercase symbols are ambiguities, i.e.:\n              a c g t u m r w s y k x n.\n    Returns : decimal number\n    Args    : string\n\n\nsub _perc_N {\n    my ($self, $seq_string) = @_;\n    $self->throw(\"Cannot accept an empty sequence\") if length($seq_string) == 0;\n    my $perc_N = 0;\n    for my $base ( split //, $seq_string ) {\n        # individual base matches an ambiguity?\n        if (( $base =~ m/[x|n|m|r|w|s|y|k]/i ) || ( $base =~ m/[a|c|g|t|u]/ ) ) {\n            $perc_N++;\n        }\n    }\n    $perc_N = $perc_N * 100 / length $seq_string;\n    return $perc_N;\n}\n\n=head2 _redundancy\n\n    Title   : _redundancy\n    Usage   : my $ref = $ass_io->_redundancy($contigobj)\n    Function: Calculate the fold coverage (redundancy) of a contig consensus\n              (average number of read base pairs covering the consensus)\n    Returns : decimal number\n    Args    : Bio::Assembly::Contig\n\n\nsub _redundancy {\n    # redundancy = (sum of all aligned read lengths - ( number of gaps in gapped\n    # consensus + number of gaps in aligned reads that are also in the consensus ) )\n    # / length of ungapped consensus\n    my ($self, $contigobj) = @_;\n    my $redundancy = 0;\n    \n    # sum of all aligned read lengths\n    my $read_tot = 0;\n    for my $readobj ( $contigobj->each_seq ) {\n        my $read_length = length($readobj->seq);\n        $read_tot += $read_length;\n    }\n    $redundancy += $read_tot;\n    \n    # - respected gaps\n    my $consensus_sequence = $contigobj->get_consensus_sequence->seq;\n    my @consensus_gaps = ();\n    $contigobj->_register_gaps($consensus_sequence, \\@consensus_gaps);\n    my $respected_gaps = scalar(@consensus_gaps);\n    if ($respected_gaps > 0) {\n        my @cons_arr = split //, $consensus_sequence;\n        for my $gap_pos_cons ( @consensus_gaps ) {\n            for my $readobj ( $contigobj->each_seq ) {\n                my $readid = $readobj->id;\n                my $read_start = $contigobj->change_coord(\n                    \"aligned $readid\", 'gapped consensus', $readobj->start);\n                my $read_end   = $contigobj->change_coord(\n                    \"aligned $readid\", 'gapped consensus', $readobj->end  );\n                # skip this if consensus gap position not within in the read boundaries\n                next if ( ($gap_pos_cons < $read_start)\n                    || ($gap_pos_cons > $read_end) );\n                # does the read position have read have a gap?\n                my @read_arr = split //, $readobj->seq;                \n                my $gap_pos_read = $contigobj->change_coord(\n                    'gapped consensus', \"aligned $readid\", $gap_pos_cons);\n                if ($read_arr[$gap_pos_read-1] eq $cons_arr[$gap_pos_cons-1]) {\n                    $respected_gaps++;\n                }\n            }\n        }\n    }\n    $redundancy -= $respected_gaps;\n    \n    # / length of ungapped consensus\n    my $contig_length = length($self->_ungap($contigobj->get_consensus_sequence->seq));\n    $redundancy /= $contig_length;\n    \n    return $redundancy;\n}\n\n=head2 _ungap\n\n    Title   : _ungap\n    Usage   : my $ungapped = $ass_io->_ungap($gapped)\n    Function: Remove the gaps from a sequence. Gaps are - in TIGR Assembler\n    Returns : string\n    Args    : string","label":"_ungap($self,$seq_string)"},"line":1045,"kind":12,"range":{"start":{"character":0,"line":1045},"end":{"line":1049,"character":9999}}},{"name":"_date_time","containerName":"main::","children":[{"name":"$self","localvar":"my","kind":13,"containerName":"_date_time","line":1062,"definition":"my"},{"name":"$sec","containerName":"_date_time","localvar":"my","kind":13,"line":1063,"definition":"my"},{"line":1063,"name":"$min","containerName":"_date_time","kind":13},{"containerName":"_date_time","kind":13,"name":"$hour","line":1063},{"containerName":"_date_time","kind":13,"name":"$mday","line":1063},{"line":1063,"containerName":"_date_time","kind":13,"name":"$mon"},{"line":1063,"containerName":"_date_time","kind":13,"name":"$year"},{"name":"$wday","kind":13,"containerName":"_date_time","line":1063},{"line":1063,"name":"$yday","containerName":"_date_time","kind":13},{"name":"$isdst","kind":13,"containerName":"_date_time","line":1063},{"definition":"my","localvar":"my","kind":13,"containerName":"_date_time","name":"$formatted_date_time","line":1064},{"name":"$mon","containerName":"_date_time","kind":13,"line":1065},{"line":1066,"name":"$mday","kind":13,"containerName":"_date_time"},{"name":"$year","kind":13,"containerName":"_date_time","line":1067},{"name":"$hour","kind":13,"containerName":"_date_time","line":1069},{"line":1070,"name":"$min","containerName":"_date_time","kind":13},{"kind":13,"containerName":"_date_time","name":"$sec","line":1071},{"name":"$formatted_date_time","kind":13,"containerName":"_date_time","line":1073}],"detail":"($self)","definition":"sub","kind":12,"range":{"start":{"line":1061,"character":0},"end":{"line":1074,"character":9999}},"line":1061,"signature":{"label":"_date_time($self)","parameters":[{"label":"$self"}],"documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_hex2dec {\n    my ($self, $qual) = @_;\n    $qual =~ s/^0x(.*)$/$1/;\n    $qual =~ s/(..)/hex($1).' '/eg;\n    return $qual;\n}\n\n=head2 _qual_dec2hex\n\n    Title   : _qual_dec2hex\n    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);\n    Function: convert a decimal quality score into an hexadecimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_dec2hex {\n    my ($self, $qual) = @_;\n    $qual =~ s/(\\d+)\\s*/sprintf('%02X', $1)/eg;\n    $qual = '0x'.$qual;\n    return $qual;\n}\n\n=head2 _store_contig\n\n    Title   : _store_contig\n    Usage   : my $contigobj; $contigobj = $self->_store_contig(\n              \\%contiginfo, $contigobj, $scaffoldobj);\n    Function: store information of a contig belonging to a scaffold in the\n              appropriate object\n    Returns : Bio::Assembly::Contig object\n    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold\n\n\nsub _store_contig {\n    my ($self, $contiginfo, $contigobj, $scaffoldobj) = @_;\n\n    # Create a contig and attach it to scaffold\n    $contigobj = Bio::Assembly::Contig->new(\n        -id     => $$contiginfo{'asmbl_id'},\n        -source => $progname,\n        -strand => 1\n    );\n    $scaffoldobj->add_contig($contigobj);\n\n    # Create a gapped consensus sequence and attach it to contig\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $consensus = Bio::LocatableSeq->new(\n        -id    => $$contiginfo{'asmbl_id'},\n        -seq   => $$contiginfo{'lsequence'},\n        -start => 1,\n    );\n    $contigobj->set_consensus_sequence($consensus);\n\n    # Create an gapped consensus quality score and attach it to contig\n    $$contiginfo{'quality'} = $self->_qual_hex2dec($$contiginfo{'quality'});\n    my $qual = Bio::Seq::Quality->new(\n        -id   => $$contiginfo{'asmbl_id'},\n        -qual => $$contiginfo{'quality'}\n    );\n    $contigobj->set_consensus_quality($qual);\n\n    # Add other misc contig information as features of the contig\n    my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$$contiginfo{'asmbl_id'}\",\n        -start       => 1,\n        -end         => $contigobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n    );\n    $contigobj->add_features([ $contigtags ], 1);\n\n    return $contigobj;\n}\n\n=head2 _store_read\n\n    Title   : _store_read\n    Usage   : my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n    Function: store information of a read belonging to a contig in the appropriate object\n    Returns : Bio::LocatableSeq\n    Args    : hash, Bio::Assembly::Contig\n\n\nsub _store_read {\n   my ($self, $readinfo, $contigobj) = @_;\n\n   # Create an aligned read object\n   #$$readinfo{'llength'} = length($$readinfo{'lsequence'});\n   $$readinfo{'strand'}  = ($$readinfo{'seq_rend'} > $$readinfo{'seq_lend'} ? 1 : -1);\n   my $readobj = Bio::LocatableSeq->new(\n       # the ids of sequence objects are supposed to include the db name in it, i.e. \"big_db|seq1234\"\n       # that's how sequence ids coming from the fasta parser are at least\n       -display_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -primary_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -seq        => $$readinfo{'lsequence'},      \n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna'\n   );\n\n   # Add read location and sequence to contig (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => $readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigobj->id() }\n   );\n   $contigobj->set_seq_coord($alncoord, $readobj);\n\n   # Add quality clipping read information in contig features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_lend'});\n   $$readinfo{'clip_end'}   = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_rend'});\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_quality_clipping:'.$readobj->id,\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'}\n   );\n   $clipcoord->attach_seq($readobj);\n   $contigobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_main_read_feature:'.$readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n\n   return $readobj;\n}\n\n=head2 _store_singlet\n\n    Title   : _store_singlet\n    Usage   : my $singletobj = $self->_store_read(\\%readinfo, \\%contiginfo,\n                  $scaffoldobj);\n    Function: store information of a singlet belonging to a scaffold in the appropriate object\n    Returns : Bio::Assembly::Singlet\n    Args    : hash, hash, Bio::Assembly::Scaffold\n\n\nsub _store_singlet {\n    my ($self, $readinfo, $contiginfo, $scaffoldobj) = @_;\n    # Singlets in TIGR_Assembler are represented as a contig of one sequence\n    # We try to simulate this duality by playing around with the Singlet object\n    \n    my $contigid = $$contiginfo{'asmbl_id'};\n    my $readid   = $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'});\n    \n    # Create a sequence object\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $seqobj = Bio::Seq::Quality->new(\n       -primary_id => $contigid, # unique id in assembly (contig name)\n       -display_id => $readid,\n       -seq        => $$contiginfo{'lsequence'}, # do not use $$readinfo as ambiguities are uppercase\n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna',\n       -qual => $self->_qual_hex2dec($$contiginfo{'quality'})    \n   );\n\n   # Create singlet from sequence and add it to scaffold\n   my $singletobj = Bio::Assembly::Singlet->new( -seqref => $seqobj );\n   $scaffoldobj->add_singlet($singletobj);\n\n   # Add other misc contig information as features of the singlet\n   my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$contigid\",\n        -start       => 1,\n        -end         => $singletobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n   );\n   $singletobj->add_features([ $contigtags ], 1);\n\n   # Add read location and sequence to singlet features (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_aligned_coord:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $alncoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $alncoord ], 0);\n\n   # Add quality clipping read information in singlet features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $$readinfo{'seq_lend'};\n   $$readinfo{'clip_end'}   = $$readinfo{'seq_rend'};\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_quality_clipping:$readid\",\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $clipcoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_main_read_feature:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n      \n   return $singletobj;\n}\n\n=head2 write_assembly\n\n    Title   : write_assembly\n    Usage   : $ass_io->write_assembly($assembly)\n    Function: Write the assembly object in TIGR Assembler compatible tasm lassie  \n              format\n    Returns : 1 on success, 0 for error\n    Args    : A Bio::Assembly::Scaffold object\n\n\nsub write_assembly {\n    my ($self,@args) = @_;    \n    my ($scaffoldobj, $singlets) = $self->_rearrange([qw(SCAFFOLD SINGLETS)], @args);\n    \n    # Sanity check\n    if ( !$scaffoldobj || !$scaffoldobj->isa('Bio::Assembly::Scaffold') ) {\n        $self->warn(\"Must provide a Bio::Align::AlignI object when calling\n            write_assembly\");\n        next;\n    }\n\n    # Get list of objects - contigs and singlets\n    my @cont_ids = $scaffoldobj->get_contig_ids;\n    my @sing_ids = $scaffoldobj->get_singlet_ids;\n    my %did;\n    my $decimal_format = '%.2f';\n    for (my $i = 0; $i < scalar @sing_ids ; $i++) {\n      # singlet display id (string)\n      my $display_id = $sing_ids[$i];\n      # singlet primary id (unique, numerical)\n      my $primary_id = $scaffoldobj->get_singlet_by_id($display_id)->seqref->primary_id;\n      $sing_ids[$i] = $primary_id;\n      $did{$primary_id} = $display_id;\n    }\n    my @ids = (@cont_ids, @sing_ids);\n    @ids = sort { $a <=> $b } @ids; # list with contig ids and singlet primary id\n    my $numobj = scalar @ids;\n\n    # Output all contigs and singlets (sorted by increasing id number)\n    for (my $i = 0 ; $i < $numobj ; $i++) {\n        \n        my $objid = $ids[$i];\n        \n        if (defined $did{$objid}) { \n            # This is a singlet\n            next unless ($singlets);\n\n            my $contigid = $objid;\n            my $readid   = $did{$objid};            \n            my $singletobj = $scaffoldobj->get_singlet_by_id($readid);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $singletobj->seqref->seq;\n            $contiginfo{'lsequence'}  = $contiginfo{'sequence'};\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$singletobj->seqref->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];   \n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf($decimal_format, 1);\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = 1;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n\n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n            \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n                        \n            # Get read information\n            my ($seq_name, $db) = $self->_split_seq_name_and_db($readid);\n            my $clipcoord = (grep\n                { $_->primary_tag eq \"_quality_clipping:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $alncoord  = (grep\n                { $_->primary_tag eq \"_aligned_coord:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $readanno = (grep\n                { $_->primary_tag eq \"_main_read_feature:$readid\" }\n                $singletobj->get_seq_coord($singletobj->seqref)->get_SeqFeatures\n            )[0];\n            my %readinfo;\n            $readinfo{'seq_name'}  = $seq_name;\n            $readinfo{'asm_lend'}  = $alncoord->location->start;\n            $readinfo{'asm_rend'}  = $alncoord->location->end;\n            $readinfo{'seq_lend'}  = $clipcoord->location->start;\n            $readinfo{'seq_rend'}  = $clipcoord->location->end;\n            $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n            $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n            $readinfo{'db'}        = $db;         \n            $readinfo{'offset'}    = 0;\n            # ambiguities in read sequence are uppercase\n            $readinfo{'lsequence'} = uc($contiginfo{'lsequence'});\n            \n            # Check that no tag value is undef\n            $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n            $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n\n            # Print read information\n            $self->_print(\n                \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                \"best\\t$readinfo{'best'}\\n\".\n                \"comment\\t$readinfo{'comment'}\\n\".\n                \"db\\t$readinfo{'db'}\\n\".\n                \"offset\\t$readinfo{'offset'}\\n\".\n                \"lsequence\\t$readinfo{'lsequence'}\\n\"\n            );\n            if ($i+1 < $numobj) {\n                $self->_print(\"|\\n\");\n            }\n        } else {\n            # This is a contig\n            my $contigid = $objid;\n            my $contigobj = $scaffoldobj->get_contig_by_id($contigid);\n\n            # Skip contigs of 1 sequence (singlets) if needed\n            next if ($contigobj->num_sequences == 1) && (!$singlets);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $contigobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $self->_ungap(\n                $contigobj->get_consensus_sequence->seq);\n            $contiginfo{'lsequence'}  = $contigobj->get_consensus_sequence->seq;\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$contigobj->get_consensus_quality->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];\n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf(\n                $decimal_format, $self->_redundancy($contigobj));\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = $contigobj->num_sequences;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n            \n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n                       \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n            my $seqno = 0;\n            for my $readobj ( $contigobj->each_seq() ) {\n                $seqno++;\n                \n                # Get read information\n                my ($seq_name, $db) = $self->_split_seq_name_and_db($readobj->id);\n                my ($asm_lend, $asm_rend, $seq_lend, $seq_rend, $offset)\n                    = $self->_coord($readobj, $contigobj);\n                my $readanno = ( grep \n                    { $_->primary_tag eq '_main_read_feature:'.$readobj->primary_id }\n                    $contigobj->get_seq_coord($readobj)->get_SeqFeatures\n                )[0];\n                my %readinfo;                \n                $readinfo{'seq_name'}  = $seq_name;\n                $readinfo{'asm_lend'}  = $asm_lend;\n                $readinfo{'asm_rend'}  = $asm_rend;\n                $readinfo{'seq_lend'}  = $seq_lend;\n                $readinfo{'seq_rend'}  = $seq_rend;                \n                $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n                $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n                $readinfo{'db'}        = $db;\n                $readinfo{'offset'}    = $offset;   \n                $readinfo{'lsequence'} = $readobj->seq(); \n                         \n                # Check that no tag value is undef\n                $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n                $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n    \n                # Print read information\n                $self->_print(\n                    \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                    \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                    \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                    \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                    \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                    \"best\\t$readinfo{'best'}\\n\".\n                    \"comment\\t$readinfo{'comment'}\\n\".\n                    \"db\\t$readinfo{'db'}\\n\".\n                    \"offset\\t$readinfo{'offset'}\\n\".\n                    \"lsequence\\t$readinfo{'lsequence'}\\n\"\n                );\n                if ($seqno < $contiginfo{'seqnum'}) {\n                    $self->_print(\"\\n\");\n                } elsif (($seqno == $contiginfo{'seqnum'}) && ($i+1 < $numobj)) {\n                    $self->_print(\"|\\n\");\n                }\n            }\n        }\n    }\n    return 1;\n}\n\n=head2 _perc_N\n\n    Title   : _perc_N\n    Usage   : my $perc_N = $ass_io->_perc_N($sequence_string)\n    Function: Calculate the percent of ambiguities in a sequence.\n              M R W S Y K X N are regarded as ambiguites in an aligned read\n              sequence by TIGR Assembler. In the case of a gapped contig\n              consensus sequence, all lowercase symbols are ambiguities, i.e.:\n              a c g t u m r w s y k x n.\n    Returns : decimal number\n    Args    : string\n\n\nsub _perc_N {\n    my ($self, $seq_string) = @_;\n    $self->throw(\"Cannot accept an empty sequence\") if length($seq_string) == 0;\n    my $perc_N = 0;\n    for my $base ( split //, $seq_string ) {\n        # individual base matches an ambiguity?\n        if (( $base =~ m/[x|n|m|r|w|s|y|k]/i ) || ( $base =~ m/[a|c|g|t|u]/ ) ) {\n            $perc_N++;\n        }\n    }\n    $perc_N = $perc_N * 100 / length $seq_string;\n    return $perc_N;\n}\n\n=head2 _redundancy\n\n    Title   : _redundancy\n    Usage   : my $ref = $ass_io->_redundancy($contigobj)\n    Function: Calculate the fold coverage (redundancy) of a contig consensus\n              (average number of read base pairs covering the consensus)\n    Returns : decimal number\n    Args    : Bio::Assembly::Contig\n\n\nsub _redundancy {\n    # redundancy = (sum of all aligned read lengths - ( number of gaps in gapped\n    # consensus + number of gaps in aligned reads that are also in the consensus ) )\n    # / length of ungapped consensus\n    my ($self, $contigobj) = @_;\n    my $redundancy = 0;\n    \n    # sum of all aligned read lengths\n    my $read_tot = 0;\n    for my $readobj ( $contigobj->each_seq ) {\n        my $read_length = length($readobj->seq);\n        $read_tot += $read_length;\n    }\n    $redundancy += $read_tot;\n    \n    # - respected gaps\n    my $consensus_sequence = $contigobj->get_consensus_sequence->seq;\n    my @consensus_gaps = ();\n    $contigobj->_register_gaps($consensus_sequence, \\@consensus_gaps);\n    my $respected_gaps = scalar(@consensus_gaps);\n    if ($respected_gaps > 0) {\n        my @cons_arr = split //, $consensus_sequence;\n        for my $gap_pos_cons ( @consensus_gaps ) {\n            for my $readobj ( $contigobj->each_seq ) {\n                my $readid = $readobj->id;\n                my $read_start = $contigobj->change_coord(\n                    \"aligned $readid\", 'gapped consensus', $readobj->start);\n                my $read_end   = $contigobj->change_coord(\n                    \"aligned $readid\", 'gapped consensus', $readobj->end  );\n                # skip this if consensus gap position not within in the read boundaries\n                next if ( ($gap_pos_cons < $read_start)\n                    || ($gap_pos_cons > $read_end) );\n                # does the read position have read have a gap?\n                my @read_arr = split //, $readobj->seq;                \n                my $gap_pos_read = $contigobj->change_coord(\n                    'gapped consensus', \"aligned $readid\", $gap_pos_cons);\n                if ($read_arr[$gap_pos_read-1] eq $cons_arr[$gap_pos_cons-1]) {\n                    $respected_gaps++;\n                }\n            }\n        }\n    }\n    $redundancy -= $respected_gaps;\n    \n    # / length of ungapped consensus\n    my $contig_length = length($self->_ungap($contigobj->get_consensus_sequence->seq));\n    $redundancy /= $contig_length;\n    \n    return $redundancy;\n}\n\n=head2 _ungap\n\n    Title   : _ungap\n    Usage   : my $ungapped = $ass_io->_ungap($gapped)\n    Function: Remove the gaps from a sequence. Gaps are - in TIGR Assembler\n    Returns : string\n    Args    : string\n\n\nsub _ungap {\n    my ($self, $seq_string) = @_;\n    $seq_string =~ s/-//g;\n    return $seq_string;\n}\n\n=head2 _date_time\n\n    Title   : _date_time\n    Usage   : my $timepoint = $ass_io->date_time\n    Function: Get date and time (MM//DD/YY HH:MM:SS)\n    Returns : string\n    Args    : none"}},{"detail":"($self,$id)","definition":"sub","name":"_split_seq_name_and_db","containerName":"main::","children":[{"line":1087,"name":"$self","localvar":"my","containerName":"_split_seq_name_and_db","kind":13,"definition":"my"},{"name":"$id","kind":13,"containerName":"_split_seq_name_and_db","line":1087},{"containerName":"_split_seq_name_and_db","localvar":"my","kind":13,"name":"$seq_name","line":1088,"definition":"my"},{"line":1089,"localvar":"my","kind":13,"containerName":"_split_seq_name_and_db","name":"$db","definition":"my"},{"containerName":"_split_seq_name_and_db","kind":13,"name":"$id","line":1090},{"name":"$db","kind":13,"containerName":"_split_seq_name_and_db","line":1091},{"line":1092,"kind":13,"containerName":"_split_seq_name_and_db","name":"$seq_name"},{"line":1094,"name":"$seq_name","containerName":"_split_seq_name_and_db","kind":13},{"line":1094,"name":"$id","containerName":"_split_seq_name_and_db","kind":13},{"line":1096,"name":"$seq_name","kind":13,"containerName":"_split_seq_name_and_db"},{"name":"$db","kind":13,"containerName":"_split_seq_name_and_db","line":1096}],"signature":{"label":"_split_seq_name_and_db($self,$id)","parameters":[{"label":"$self"},{"label":"$id"}],"documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_hex2dec {\n    my ($self, $qual) = @_;\n    $qual =~ s/^0x(.*)$/$1/;\n    $qual =~ s/(..)/hex($1).' '/eg;\n    return $qual;\n}\n\n=head2 _qual_dec2hex\n\n    Title   : _qual_dec2hex\n    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);\n    Function: convert a decimal quality score into an hexadecimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_dec2hex {\n    my ($self, $qual) = @_;\n    $qual =~ s/(\\d+)\\s*/sprintf('%02X', $1)/eg;\n    $qual = '0x'.$qual;\n    return $qual;\n}\n\n=head2 _store_contig\n\n    Title   : _store_contig\n    Usage   : my $contigobj; $contigobj = $self->_store_contig(\n              \\%contiginfo, $contigobj, $scaffoldobj);\n    Function: store information of a contig belonging to a scaffold in the\n              appropriate object\n    Returns : Bio::Assembly::Contig object\n    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold\n\n\nsub _store_contig {\n    my ($self, $contiginfo, $contigobj, $scaffoldobj) = @_;\n\n    # Create a contig and attach it to scaffold\n    $contigobj = Bio::Assembly::Contig->new(\n        -id     => $$contiginfo{'asmbl_id'},\n        -source => $progname,\n        -strand => 1\n    );\n    $scaffoldobj->add_contig($contigobj);\n\n    # Create a gapped consensus sequence and attach it to contig\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $consensus = Bio::LocatableSeq->new(\n        -id    => $$contiginfo{'asmbl_id'},\n        -seq   => $$contiginfo{'lsequence'},\n        -start => 1,\n    );\n    $contigobj->set_consensus_sequence($consensus);\n\n    # Create an gapped consensus quality score and attach it to contig\n    $$contiginfo{'quality'} = $self->_qual_hex2dec($$contiginfo{'quality'});\n    my $qual = Bio::Seq::Quality->new(\n        -id   => $$contiginfo{'asmbl_id'},\n        -qual => $$contiginfo{'quality'}\n    );\n    $contigobj->set_consensus_quality($qual);\n\n    # Add other misc contig information as features of the contig\n    my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$$contiginfo{'asmbl_id'}\",\n        -start       => 1,\n        -end         => $contigobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n    );\n    $contigobj->add_features([ $contigtags ], 1);\n\n    return $contigobj;\n}\n\n=head2 _store_read\n\n    Title   : _store_read\n    Usage   : my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n    Function: store information of a read belonging to a contig in the appropriate object\n    Returns : Bio::LocatableSeq\n    Args    : hash, Bio::Assembly::Contig\n\n\nsub _store_read {\n   my ($self, $readinfo, $contigobj) = @_;\n\n   # Create an aligned read object\n   #$$readinfo{'llength'} = length($$readinfo{'lsequence'});\n   $$readinfo{'strand'}  = ($$readinfo{'seq_rend'} > $$readinfo{'seq_lend'} ? 1 : -1);\n   my $readobj = Bio::LocatableSeq->new(\n       # the ids of sequence objects are supposed to include the db name in it, i.e. \"big_db|seq1234\"\n       # that's how sequence ids coming from the fasta parser are at least\n       -display_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -primary_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -seq        => $$readinfo{'lsequence'},      \n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna'\n   );\n\n   # Add read location and sequence to contig (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => $readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigobj->id() }\n   );\n   $contigobj->set_seq_coord($alncoord, $readobj);\n\n   # Add quality clipping read information in contig features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_lend'});\n   $$readinfo{'clip_end'}   = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_rend'});\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_quality_clipping:'.$readobj->id,\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'}\n   );\n   $clipcoord->attach_seq($readobj);\n   $contigobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_main_read_feature:'.$readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n\n   return $readobj;\n}\n\n=head2 _store_singlet\n\n    Title   : _store_singlet\n    Usage   : my $singletobj = $self->_store_read(\\%readinfo, \\%contiginfo,\n                  $scaffoldobj);\n    Function: store information of a singlet belonging to a scaffold in the appropriate object\n    Returns : Bio::Assembly::Singlet\n    Args    : hash, hash, Bio::Assembly::Scaffold\n\n\nsub _store_singlet {\n    my ($self, $readinfo, $contiginfo, $scaffoldobj) = @_;\n    # Singlets in TIGR_Assembler are represented as a contig of one sequence\n    # We try to simulate this duality by playing around with the Singlet object\n    \n    my $contigid = $$contiginfo{'asmbl_id'};\n    my $readid   = $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'});\n    \n    # Create a sequence object\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $seqobj = Bio::Seq::Quality->new(\n       -primary_id => $contigid, # unique id in assembly (contig name)\n       -display_id => $readid,\n       -seq        => $$contiginfo{'lsequence'}, # do not use $$readinfo as ambiguities are uppercase\n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna',\n       -qual => $self->_qual_hex2dec($$contiginfo{'quality'})    \n   );\n\n   # Create singlet from sequence and add it to scaffold\n   my $singletobj = Bio::Assembly::Singlet->new( -seqref => $seqobj );\n   $scaffoldobj->add_singlet($singletobj);\n\n   # Add other misc contig information as features of the singlet\n   my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$contigid\",\n        -start       => 1,\n        -end         => $singletobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n   );\n   $singletobj->add_features([ $contigtags ], 1);\n\n   # Add read location and sequence to singlet features (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_aligned_coord:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $alncoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $alncoord ], 0);\n\n   # Add quality clipping read information in singlet features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $$readinfo{'seq_lend'};\n   $$readinfo{'clip_end'}   = $$readinfo{'seq_rend'};\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_quality_clipping:$readid\",\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $clipcoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_main_read_feature:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n      \n   return $singletobj;\n}\n\n=head2 write_assembly\n\n    Title   : write_assembly\n    Usage   : $ass_io->write_assembly($assembly)\n    Function: Write the assembly object in TIGR Assembler compatible tasm lassie  \n              format\n    Returns : 1 on success, 0 for error\n    Args    : A Bio::Assembly::Scaffold object\n\n\nsub write_assembly {\n    my ($self,@args) = @_;    \n    my ($scaffoldobj, $singlets) = $self->_rearrange([qw(SCAFFOLD SINGLETS)], @args);\n    \n    # Sanity check\n    if ( !$scaffoldobj || !$scaffoldobj->isa('Bio::Assembly::Scaffold') ) {\n        $self->warn(\"Must provide a Bio::Align::AlignI object when calling\n            write_assembly\");\n        next;\n    }\n\n    # Get list of objects - contigs and singlets\n    my @cont_ids = $scaffoldobj->get_contig_ids;\n    my @sing_ids = $scaffoldobj->get_singlet_ids;\n    my %did;\n    my $decimal_format = '%.2f';\n    for (my $i = 0; $i < scalar @sing_ids ; $i++) {\n      # singlet display id (string)\n      my $display_id = $sing_ids[$i];\n      # singlet primary id (unique, numerical)\n      my $primary_id = $scaffoldobj->get_singlet_by_id($display_id)->seqref->primary_id;\n      $sing_ids[$i] = $primary_id;\n      $did{$primary_id} = $display_id;\n    }\n    my @ids = (@cont_ids, @sing_ids);\n    @ids = sort { $a <=> $b } @ids; # list with contig ids and singlet primary id\n    my $numobj = scalar @ids;\n\n    # Output all contigs and singlets (sorted by increasing id number)\n    for (my $i = 0 ; $i < $numobj ; $i++) {\n        \n        my $objid = $ids[$i];\n        \n        if (defined $did{$objid}) { \n            # This is a singlet\n            next unless ($singlets);\n\n            my $contigid = $objid;\n            my $readid   = $did{$objid};            \n            my $singletobj = $scaffoldobj->get_singlet_by_id($readid);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $singletobj->seqref->seq;\n            $contiginfo{'lsequence'}  = $contiginfo{'sequence'};\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$singletobj->seqref->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];   \n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf($decimal_format, 1);\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = 1;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n\n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n            \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n                        \n            # Get read information\n            my ($seq_name, $db) = $self->_split_seq_name_and_db($readid);\n            my $clipcoord = (grep\n                { $_->primary_tag eq \"_quality_clipping:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $alncoord  = (grep\n                { $_->primary_tag eq \"_aligned_coord:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $readanno = (grep\n                { $_->primary_tag eq \"_main_read_feature:$readid\" }\n                $singletobj->get_seq_coord($singletobj->seqref)->get_SeqFeatures\n            )[0];\n            my %readinfo;\n            $readinfo{'seq_name'}  = $seq_name;\n            $readinfo{'asm_lend'}  = $alncoord->location->start;\n            $readinfo{'asm_rend'}  = $alncoord->location->end;\n            $readinfo{'seq_lend'}  = $clipcoord->location->start;\n            $readinfo{'seq_rend'}  = $clipcoord->location->end;\n            $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n            $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n            $readinfo{'db'}        = $db;         \n            $readinfo{'offset'}    = 0;\n            # ambiguities in read sequence are uppercase\n            $readinfo{'lsequence'} = uc($contiginfo{'lsequence'});\n            \n            # Check that no tag value is undef\n            $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n            $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n\n            # Print read information\n            $self->_print(\n                \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                \"best\\t$readinfo{'best'}\\n\".\n                \"comment\\t$readinfo{'comment'}\\n\".\n                \"db\\t$readinfo{'db'}\\n\".\n                \"offset\\t$readinfo{'offset'}\\n\".\n                \"lsequence\\t$readinfo{'lsequence'}\\n\"\n            );\n            if ($i+1 < $numobj) {\n                $self->_print(\"|\\n\");\n            }\n        } else {\n            # This is a contig\n            my $contigid = $objid;\n            my $contigobj = $scaffoldobj->get_contig_by_id($contigid);\n\n            # Skip contigs of 1 sequence (singlets) if needed\n            next if ($contigobj->num_sequences == 1) && (!$singlets);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $contigobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $self->_ungap(\n                $contigobj->get_consensus_sequence->seq);\n            $contiginfo{'lsequence'}  = $contigobj->get_consensus_sequence->seq;\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$contigobj->get_consensus_quality->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];\n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf(\n                $decimal_format, $self->_redundancy($contigobj));\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = $contigobj->num_sequences;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n            \n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n                       \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n            my $seqno = 0;\n            for my $readobj ( $contigobj->each_seq() ) {\n                $seqno++;\n                \n                # Get read information\n                my ($seq_name, $db) = $self->_split_seq_name_and_db($readobj->id);\n                my ($asm_lend, $asm_rend, $seq_lend, $seq_rend, $offset)\n                    = $self->_coord($readobj, $contigobj);\n                my $readanno = ( grep \n                    { $_->primary_tag eq '_main_read_feature:'.$readobj->primary_id }\n                    $contigobj->get_seq_coord($readobj)->get_SeqFeatures\n                )[0];\n                my %readinfo;                \n                $readinfo{'seq_name'}  = $seq_name;\n                $readinfo{'asm_lend'}  = $asm_lend;\n                $readinfo{'asm_rend'}  = $asm_rend;\n                $readinfo{'seq_lend'}  = $seq_lend;\n                $readinfo{'seq_rend'}  = $seq_rend;                \n                $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n                $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n                $readinfo{'db'}        = $db;\n                $readinfo{'offset'}    = $offset;   \n                $readinfo{'lsequence'} = $readobj->seq(); \n                         \n                # Check that no tag value is undef\n                $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n                $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n    \n                # Print read information\n                $self->_print(\n                    \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                    \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                    \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                    \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                    \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                    \"best\\t$readinfo{'best'}\\n\".\n                    \"comment\\t$readinfo{'comment'}\\n\".\n                    \"db\\t$readinfo{'db'}\\n\".\n                    \"offset\\t$readinfo{'offset'}\\n\".\n                    \"lsequence\\t$readinfo{'lsequence'}\\n\"\n                );\n                if ($seqno < $contiginfo{'seqnum'}) {\n                    $self->_print(\"\\n\");\n                } elsif (($seqno == $contiginfo{'seqnum'}) && ($i+1 < $numobj)) {\n                    $self->_print(\"|\\n\");\n                }\n            }\n        }\n    }\n    return 1;\n}\n\n=head2 _perc_N\n\n    Title   : _perc_N\n    Usage   : my $perc_N = $ass_io->_perc_N($sequence_string)\n    Function: Calculate the percent of ambiguities in a sequence.\n              M R W S Y K X N are regarded as ambiguites in an aligned read\n              sequence by TIGR Assembler. In the case of a gapped contig\n              consensus sequence, all lowercase symbols are ambiguities, i.e.:\n              a c g t u m r w s y k x n.\n    Returns : decimal number\n    Args    : string\n\n\nsub _perc_N {\n    my ($self, $seq_string) = @_;\n    $self->throw(\"Cannot accept an empty sequence\") if length($seq_string) == 0;\n    my $perc_N = 0;\n    for my $base ( split //, $seq_string ) {\n        # individual base matches an ambiguity?\n        if (( $base =~ m/[x|n|m|r|w|s|y|k]/i ) || ( $base =~ m/[a|c|g|t|u]/ ) ) {\n            $perc_N++;\n        }\n    }\n    $perc_N = $perc_N * 100 / length $seq_string;\n    return $perc_N;\n}\n\n=head2 _redundancy\n\n    Title   : _redundancy\n    Usage   : my $ref = $ass_io->_redundancy($contigobj)\n    Function: Calculate the fold coverage (redundancy) of a contig consensus\n              (average number of read base pairs covering the consensus)\n    Returns : decimal number\n    Args    : Bio::Assembly::Contig\n\n\nsub _redundancy {\n    # redundancy = (sum of all aligned read lengths - ( number of gaps in gapped\n    # consensus + number of gaps in aligned reads that are also in the consensus ) )\n    # / length of ungapped consensus\n    my ($self, $contigobj) = @_;\n    my $redundancy = 0;\n    \n    # sum of all aligned read lengths\n    my $read_tot = 0;\n    for my $readobj ( $contigobj->each_seq ) {\n        my $read_length = length($readobj->seq);\n        $read_tot += $read_length;\n    }\n    $redundancy += $read_tot;\n    \n    # - respected gaps\n    my $consensus_sequence = $contigobj->get_consensus_sequence->seq;\n    my @consensus_gaps = ();\n    $contigobj->_register_gaps($consensus_sequence, \\@consensus_gaps);\n    my $respected_gaps = scalar(@consensus_gaps);\n    if ($respected_gaps > 0) {\n        my @cons_arr = split //, $consensus_sequence;\n        for my $gap_pos_cons ( @consensus_gaps ) {\n            for my $readobj ( $contigobj->each_seq ) {\n                my $readid = $readobj->id;\n                my $read_start = $contigobj->change_coord(\n                    \"aligned $readid\", 'gapped consensus', $readobj->start);\n                my $read_end   = $contigobj->change_coord(\n                    \"aligned $readid\", 'gapped consensus', $readobj->end  );\n                # skip this if consensus gap position not within in the read boundaries\n                next if ( ($gap_pos_cons < $read_start)\n                    || ($gap_pos_cons > $read_end) );\n                # does the read position have read have a gap?\n                my @read_arr = split //, $readobj->seq;                \n                my $gap_pos_read = $contigobj->change_coord(\n                    'gapped consensus', \"aligned $readid\", $gap_pos_cons);\n                if ($read_arr[$gap_pos_read-1] eq $cons_arr[$gap_pos_cons-1]) {\n                    $respected_gaps++;\n                }\n            }\n        }\n    }\n    $redundancy -= $respected_gaps;\n    \n    # / length of ungapped consensus\n    my $contig_length = length($self->_ungap($contigobj->get_consensus_sequence->seq));\n    $redundancy /= $contig_length;\n    \n    return $redundancy;\n}\n\n=head2 _ungap\n\n    Title   : _ungap\n    Usage   : my $ungapped = $ass_io->_ungap($gapped)\n    Function: Remove the gaps from a sequence. Gaps are - in TIGR Assembler\n    Returns : string\n    Args    : string\n\n\nsub _ungap {\n    my ($self, $seq_string) = @_;\n    $seq_string =~ s/-//g;\n    return $seq_string;\n}\n\n=head2 _date_time\n\n    Title   : _date_time\n    Usage   : my $timepoint = $ass_io->date_time\n    Function: Get date and time (MM//DD/YY HH:MM:SS)\n    Returns : string\n    Args    : none\n\n\nsub _date_time {\n    my ($self) = @_;\n    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);\n    my $formatted_date_time = \n        sprintf('%02d', $mon+1).'/'.\n        sprintf('%02d', $mday).'/'.\n        sprintf('%02d', $year % 100).\n        ' '.\n        sprintf('%02d', $hour).':'.\n        sprintf('%02d', $min).':'.\n        sprintf('%02d',$sec)\n    ;\n    return $formatted_date_time;\n}\n\n=head2 _split_seq_name_and_db\n\n    Title   : _split_seq_name_and_db\n    Usage   : my ($seqname, $db) = $ass_io->_split_seq_name_and_db($id)\n    Function: Extract seq_name and db from sequence id\n    Returns : seq_name, db\n    Args    : id"},"kind":12,"range":{"end":{"character":9999,"line":1097},"start":{"line":1086,"character":0}},"line":1086},{"detail":"($self,$seq_name,$db)","definition":"sub","name":"_merge_seq_name_and_db","containerName":"main::","children":[{"definition":"my","name":"$self","containerName":"_merge_seq_name_and_db","localvar":"my","kind":13,"line":1110},{"line":1110,"name":"$seq_name","kind":13,"containerName":"_merge_seq_name_and_db"},{"line":1110,"name":"$db","kind":13,"containerName":"_merge_seq_name_and_db"},{"name":"$id","kind":13,"localvar":"my","containerName":"_merge_seq_name_and_db","line":1111,"definition":"my"},{"line":1112,"kind":13,"containerName":"_merge_seq_name_and_db","name":"$db"},{"name":"$id","kind":13,"containerName":"_merge_seq_name_and_db","line":1113},{"line":1113,"containerName":"_merge_seq_name_and_db","kind":13,"name":"$db"},{"kind":13,"containerName":"_merge_seq_name_and_db","name":"$seq_name","line":1113},{"line":1115,"kind":13,"containerName":"_merge_seq_name_and_db","name":"$id"},{"line":1115,"name":"$seq_name","kind":13,"containerName":"_merge_seq_name_and_db"},{"kind":13,"containerName":"_merge_seq_name_and_db","name":"$id","line":1117}],"signature":{"parameters":[{"label":"$self"},{"label":"$seq_name"},{"label":"$db"}],"documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_hex2dec {\n    my ($self, $qual) = @_;\n    $qual =~ s/^0x(.*)$/$1/;\n    $qual =~ s/(..)/hex($1).' '/eg;\n    return $qual;\n}\n\n=head2 _qual_dec2hex\n\n    Title   : _qual_dec2hex\n    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);\n    Function: convert a decimal quality score into an hexadecimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_dec2hex {\n    my ($self, $qual) = @_;\n    $qual =~ s/(\\d+)\\s*/sprintf('%02X', $1)/eg;\n    $qual = '0x'.$qual;\n    return $qual;\n}\n\n=head2 _store_contig\n\n    Title   : _store_contig\n    Usage   : my $contigobj; $contigobj = $self->_store_contig(\n              \\%contiginfo, $contigobj, $scaffoldobj);\n    Function: store information of a contig belonging to a scaffold in the\n              appropriate object\n    Returns : Bio::Assembly::Contig object\n    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold\n\n\nsub _store_contig {\n    my ($self, $contiginfo, $contigobj, $scaffoldobj) = @_;\n\n    # Create a contig and attach it to scaffold\n    $contigobj = Bio::Assembly::Contig->new(\n        -id     => $$contiginfo{'asmbl_id'},\n        -source => $progname,\n        -strand => 1\n    );\n    $scaffoldobj->add_contig($contigobj);\n\n    # Create a gapped consensus sequence and attach it to contig\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $consensus = Bio::LocatableSeq->new(\n        -id    => $$contiginfo{'asmbl_id'},\n        -seq   => $$contiginfo{'lsequence'},\n        -start => 1,\n    );\n    $contigobj->set_consensus_sequence($consensus);\n\n    # Create an gapped consensus quality score and attach it to contig\n    $$contiginfo{'quality'} = $self->_qual_hex2dec($$contiginfo{'quality'});\n    my $qual = Bio::Seq::Quality->new(\n        -id   => $$contiginfo{'asmbl_id'},\n        -qual => $$contiginfo{'quality'}\n    );\n    $contigobj->set_consensus_quality($qual);\n\n    # Add other misc contig information as features of the contig\n    my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$$contiginfo{'asmbl_id'}\",\n        -start       => 1,\n        -end         => $contigobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n    );\n    $contigobj->add_features([ $contigtags ], 1);\n\n    return $contigobj;\n}\n\n=head2 _store_read\n\n    Title   : _store_read\n    Usage   : my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n    Function: store information of a read belonging to a contig in the appropriate object\n    Returns : Bio::LocatableSeq\n    Args    : hash, Bio::Assembly::Contig\n\n\nsub _store_read {\n   my ($self, $readinfo, $contigobj) = @_;\n\n   # Create an aligned read object\n   #$$readinfo{'llength'} = length($$readinfo{'lsequence'});\n   $$readinfo{'strand'}  = ($$readinfo{'seq_rend'} > $$readinfo{'seq_lend'} ? 1 : -1);\n   my $readobj = Bio::LocatableSeq->new(\n       # the ids of sequence objects are supposed to include the db name in it, i.e. \"big_db|seq1234\"\n       # that's how sequence ids coming from the fasta parser are at least\n       -display_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -primary_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -seq        => $$readinfo{'lsequence'},      \n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna'\n   );\n\n   # Add read location and sequence to contig (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => $readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigobj->id() }\n   );\n   $contigobj->set_seq_coord($alncoord, $readobj);\n\n   # Add quality clipping read information in contig features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_lend'});\n   $$readinfo{'clip_end'}   = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_rend'});\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_quality_clipping:'.$readobj->id,\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'}\n   );\n   $clipcoord->attach_seq($readobj);\n   $contigobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_main_read_feature:'.$readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n\n   return $readobj;\n}\n\n=head2 _store_singlet\n\n    Title   : _store_singlet\n    Usage   : my $singletobj = $self->_store_read(\\%readinfo, \\%contiginfo,\n                  $scaffoldobj);\n    Function: store information of a singlet belonging to a scaffold in the appropriate object\n    Returns : Bio::Assembly::Singlet\n    Args    : hash, hash, Bio::Assembly::Scaffold\n\n\nsub _store_singlet {\n    my ($self, $readinfo, $contiginfo, $scaffoldobj) = @_;\n    # Singlets in TIGR_Assembler are represented as a contig of one sequence\n    # We try to simulate this duality by playing around with the Singlet object\n    \n    my $contigid = $$contiginfo{'asmbl_id'};\n    my $readid   = $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'});\n    \n    # Create a sequence object\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $seqobj = Bio::Seq::Quality->new(\n       -primary_id => $contigid, # unique id in assembly (contig name)\n       -display_id => $readid,\n       -seq        => $$contiginfo{'lsequence'}, # do not use $$readinfo as ambiguities are uppercase\n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna',\n       -qual => $self->_qual_hex2dec($$contiginfo{'quality'})    \n   );\n\n   # Create singlet from sequence and add it to scaffold\n   my $singletobj = Bio::Assembly::Singlet->new( -seqref => $seqobj );\n   $scaffoldobj->add_singlet($singletobj);\n\n   # Add other misc contig information as features of the singlet\n   my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$contigid\",\n        -start       => 1,\n        -end         => $singletobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n   );\n   $singletobj->add_features([ $contigtags ], 1);\n\n   # Add read location and sequence to singlet features (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_aligned_coord:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $alncoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $alncoord ], 0);\n\n   # Add quality clipping read information in singlet features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $$readinfo{'seq_lend'};\n   $$readinfo{'clip_end'}   = $$readinfo{'seq_rend'};\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_quality_clipping:$readid\",\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $clipcoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_main_read_feature:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n      \n   return $singletobj;\n}\n\n=head2 write_assembly\n\n    Title   : write_assembly\n    Usage   : $ass_io->write_assembly($assembly)\n    Function: Write the assembly object in TIGR Assembler compatible tasm lassie  \n              format\n    Returns : 1 on success, 0 for error\n    Args    : A Bio::Assembly::Scaffold object\n\n\nsub write_assembly {\n    my ($self,@args) = @_;    \n    my ($scaffoldobj, $singlets) = $self->_rearrange([qw(SCAFFOLD SINGLETS)], @args);\n    \n    # Sanity check\n    if ( !$scaffoldobj || !$scaffoldobj->isa('Bio::Assembly::Scaffold') ) {\n        $self->warn(\"Must provide a Bio::Align::AlignI object when calling\n            write_assembly\");\n        next;\n    }\n\n    # Get list of objects - contigs and singlets\n    my @cont_ids = $scaffoldobj->get_contig_ids;\n    my @sing_ids = $scaffoldobj->get_singlet_ids;\n    my %did;\n    my $decimal_format = '%.2f';\n    for (my $i = 0; $i < scalar @sing_ids ; $i++) {\n      # singlet display id (string)\n      my $display_id = $sing_ids[$i];\n      # singlet primary id (unique, numerical)\n      my $primary_id = $scaffoldobj->get_singlet_by_id($display_id)->seqref->primary_id;\n      $sing_ids[$i] = $primary_id;\n      $did{$primary_id} = $display_id;\n    }\n    my @ids = (@cont_ids, @sing_ids);\n    @ids = sort { $a <=> $b } @ids; # list with contig ids and singlet primary id\n    my $numobj = scalar @ids;\n\n    # Output all contigs and singlets (sorted by increasing id number)\n    for (my $i = 0 ; $i < $numobj ; $i++) {\n        \n        my $objid = $ids[$i];\n        \n        if (defined $did{$objid}) { \n            # This is a singlet\n            next unless ($singlets);\n\n            my $contigid = $objid;\n            my $readid   = $did{$objid};            \n            my $singletobj = $scaffoldobj->get_singlet_by_id($readid);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $singletobj->seqref->seq;\n            $contiginfo{'lsequence'}  = $contiginfo{'sequence'};\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$singletobj->seqref->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];   \n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf($decimal_format, 1);\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = 1;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n\n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n            \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n                        \n            # Get read information\n            my ($seq_name, $db) = $self->_split_seq_name_and_db($readid);\n            my $clipcoord = (grep\n                { $_->primary_tag eq \"_quality_clipping:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $alncoord  = (grep\n                { $_->primary_tag eq \"_aligned_coord:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $readanno = (grep\n                { $_->primary_tag eq \"_main_read_feature:$readid\" }\n                $singletobj->get_seq_coord($singletobj->seqref)->get_SeqFeatures\n            )[0];\n            my %readinfo;\n            $readinfo{'seq_name'}  = $seq_name;\n            $readinfo{'asm_lend'}  = $alncoord->location->start;\n            $readinfo{'asm_rend'}  = $alncoord->location->end;\n            $readinfo{'seq_lend'}  = $clipcoord->location->start;\n            $readinfo{'seq_rend'}  = $clipcoord->location->end;\n            $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n            $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n            $readinfo{'db'}        = $db;         \n            $readinfo{'offset'}    = 0;\n            # ambiguities in read sequence are uppercase\n            $readinfo{'lsequence'} = uc($contiginfo{'lsequence'});\n            \n            # Check that no tag value is undef\n            $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n            $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n\n            # Print read information\n            $self->_print(\n                \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                \"best\\t$readinfo{'best'}\\n\".\n                \"comment\\t$readinfo{'comment'}\\n\".\n                \"db\\t$readinfo{'db'}\\n\".\n                \"offset\\t$readinfo{'offset'}\\n\".\n                \"lsequence\\t$readinfo{'lsequence'}\\n\"\n            );\n            if ($i+1 < $numobj) {\n                $self->_print(\"|\\n\");\n            }\n        } else {\n            # This is a contig\n            my $contigid = $objid;\n            my $contigobj = $scaffoldobj->get_contig_by_id($contigid);\n\n            # Skip contigs of 1 sequence (singlets) if needed\n            next if ($contigobj->num_sequences == 1) && (!$singlets);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $contigobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $self->_ungap(\n                $contigobj->get_consensus_sequence->seq);\n            $contiginfo{'lsequence'}  = $contigobj->get_consensus_sequence->seq;\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$contigobj->get_consensus_quality->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];\n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf(\n                $decimal_format, $self->_redundancy($contigobj));\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = $contigobj->num_sequences;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n            \n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n                       \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n            my $seqno = 0;\n            for my $readobj ( $contigobj->each_seq() ) {\n                $seqno++;\n                \n                # Get read information\n                my ($seq_name, $db) = $self->_split_seq_name_and_db($readobj->id);\n                my ($asm_lend, $asm_rend, $seq_lend, $seq_rend, $offset)\n                    = $self->_coord($readobj, $contigobj);\n                my $readanno = ( grep \n                    { $_->primary_tag eq '_main_read_feature:'.$readobj->primary_id }\n                    $contigobj->get_seq_coord($readobj)->get_SeqFeatures\n                )[0];\n                my %readinfo;                \n                $readinfo{'seq_name'}  = $seq_name;\n                $readinfo{'asm_lend'}  = $asm_lend;\n                $readinfo{'asm_rend'}  = $asm_rend;\n                $readinfo{'seq_lend'}  = $seq_lend;\n                $readinfo{'seq_rend'}  = $seq_rend;                \n                $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n                $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n                $readinfo{'db'}        = $db;\n                $readinfo{'offset'}    = $offset;   \n                $readinfo{'lsequence'} = $readobj->seq(); \n                         \n                # Check that no tag value is undef\n                $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n                $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n    \n                # Print read information\n                $self->_print(\n                    \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                    \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                    \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                    \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                    \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                    \"best\\t$readinfo{'best'}\\n\".\n                    \"comment\\t$readinfo{'comment'}\\n\".\n                    \"db\\t$readinfo{'db'}\\n\".\n                    \"offset\\t$readinfo{'offset'}\\n\".\n                    \"lsequence\\t$readinfo{'lsequence'}\\n\"\n                );\n                if ($seqno < $contiginfo{'seqnum'}) {\n                    $self->_print(\"\\n\");\n                } elsif (($seqno == $contiginfo{'seqnum'}) && ($i+1 < $numobj)) {\n                    $self->_print(\"|\\n\");\n                }\n            }\n        }\n    }\n    return 1;\n}\n\n=head2 _perc_N\n\n    Title   : _perc_N\n    Usage   : my $perc_N = $ass_io->_perc_N($sequence_string)\n    Function: Calculate the percent of ambiguities in a sequence.\n              M R W S Y K X N are regarded as ambiguites in an aligned read\n              sequence by TIGR Assembler. In the case of a gapped contig\n              consensus sequence, all lowercase symbols are ambiguities, i.e.:\n              a c g t u m r w s y k x n.\n    Returns : decimal number\n    Args    : string\n\n\nsub _perc_N {\n    my ($self, $seq_string) = @_;\n    $self->throw(\"Cannot accept an empty sequence\") if length($seq_string) == 0;\n    my $perc_N = 0;\n    for my $base ( split //, $seq_string ) {\n        # individual base matches an ambiguity?\n        if (( $base =~ m/[x|n|m|r|w|s|y|k]/i ) || ( $base =~ m/[a|c|g|t|u]/ ) ) {\n            $perc_N++;\n        }\n    }\n    $perc_N = $perc_N * 100 / length $seq_string;\n    return $perc_N;\n}\n\n=head2 _redundancy\n\n    Title   : _redundancy\n    Usage   : my $ref = $ass_io->_redundancy($contigobj)\n    Function: Calculate the fold coverage (redundancy) of a contig consensus\n              (average number of read base pairs covering the consensus)\n    Returns : decimal number\n    Args    : Bio::Assembly::Contig\n\n\nsub _redundancy {\n    # redundancy = (sum of all aligned read lengths - ( number of gaps in gapped\n    # consensus + number of gaps in aligned reads that are also in the consensus ) )\n    # / length of ungapped consensus\n    my ($self, $contigobj) = @_;\n    my $redundancy = 0;\n    \n    # sum of all aligned read lengths\n    my $read_tot = 0;\n    for my $readobj ( $contigobj->each_seq ) {\n        my $read_length = length($readobj->seq);\n        $read_tot += $read_length;\n    }\n    $redundancy += $read_tot;\n    \n    # - respected gaps\n    my $consensus_sequence = $contigobj->get_consensus_sequence->seq;\n    my @consensus_gaps = ();\n    $contigobj->_register_gaps($consensus_sequence, \\@consensus_gaps);\n    my $respected_gaps = scalar(@consensus_gaps);\n    if ($respected_gaps > 0) {\n        my @cons_arr = split //, $consensus_sequence;\n        for my $gap_pos_cons ( @consensus_gaps ) {\n            for my $readobj ( $contigobj->each_seq ) {\n                my $readid = $readobj->id;\n                my $read_start = $contigobj->change_coord(\n                    \"aligned $readid\", 'gapped consensus', $readobj->start);\n                my $read_end   = $contigobj->change_coord(\n                    \"aligned $readid\", 'gapped consensus', $readobj->end  );\n                # skip this if consensus gap position not within in the read boundaries\n                next if ( ($gap_pos_cons < $read_start)\n                    || ($gap_pos_cons > $read_end) );\n                # does the read position have read have a gap?\n                my @read_arr = split //, $readobj->seq;                \n                my $gap_pos_read = $contigobj->change_coord(\n                    'gapped consensus', \"aligned $readid\", $gap_pos_cons);\n                if ($read_arr[$gap_pos_read-1] eq $cons_arr[$gap_pos_cons-1]) {\n                    $respected_gaps++;\n                }\n            }\n        }\n    }\n    $redundancy -= $respected_gaps;\n    \n    # / length of ungapped consensus\n    my $contig_length = length($self->_ungap($contigobj->get_consensus_sequence->seq));\n    $redundancy /= $contig_length;\n    \n    return $redundancy;\n}\n\n=head2 _ungap\n\n    Title   : _ungap\n    Usage   : my $ungapped = $ass_io->_ungap($gapped)\n    Function: Remove the gaps from a sequence. Gaps are - in TIGR Assembler\n    Returns : string\n    Args    : string\n\n\nsub _ungap {\n    my ($self, $seq_string) = @_;\n    $seq_string =~ s/-//g;\n    return $seq_string;\n}\n\n=head2 _date_time\n\n    Title   : _date_time\n    Usage   : my $timepoint = $ass_io->date_time\n    Function: Get date and time (MM//DD/YY HH:MM:SS)\n    Returns : string\n    Args    : none\n\n\nsub _date_time {\n    my ($self) = @_;\n    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);\n    my $formatted_date_time = \n        sprintf('%02d', $mon+1).'/'.\n        sprintf('%02d', $mday).'/'.\n        sprintf('%02d', $year % 100).\n        ' '.\n        sprintf('%02d', $hour).':'.\n        sprintf('%02d', $min).':'.\n        sprintf('%02d',$sec)\n    ;\n    return $formatted_date_time;\n}\n\n=head2 _split_seq_name_and_db\n\n    Title   : _split_seq_name_and_db\n    Usage   : my ($seqname, $db) = $ass_io->_split_seq_name_and_db($id)\n    Function: Extract seq_name and db from sequence id\n    Returns : seq_name, db\n    Args    : id\n\n\nsub _split_seq_name_and_db {\n    my ($self, $id) = @_;\n    my $seq_name = '';\n    my $db       = '';\n    if ($id =~ m/(\\S+)\\|(\\S+)/) {\n        $db       = $1;\n        $seq_name = $2;\n    } else {\n        $seq_name = $id;\n    }\n    return ($seq_name, $db);\n}\n\n=head2 _merge_seq_name_and_db\n\n    Title   : _merge_seq_name_and_db\n    Usage   : my $id = $ass_io->_merge_seq_name_and_db($seq_name, $db)\n    Function: Construct id from seq_name and db\n    Returns : id\n    Args    : seq_name, db","label":"_merge_seq_name_and_db($self,$seq_name,$db)"},"range":{"start":{"character":0,"line":1109},"end":{"line":1118,"character":9999}},"kind":12,"line":1109},{"signature":{"label":"_coord($self,$readobj,$contigobj)","parameters":[{"label":"$self"},{"label":"$readobj"},{"label":"$contigobj"}],"documentation":"__END__\n# $Id: tigr.pm 16123 2009-09-17 12:57:27Z cjfields $\n#\n# BioPerl module for Bio::Assembly::IO::tigr\n#\n# Copyright by Florent Angly\n#\n# You may distribute this module under the same terms as Perl itself\n#\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR\nAssembler v2 default format.\n\n=head1 SYNOPSIS\n\n    # Building an input stream\n    use Bio::Assembly::IO;\n\n    # Assembly loading methods\n    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',\n                                        -format => 'tigr' );\n    my $scaffold = $asmio->next_assembly;\n\n    # Do some things on contigs...\n\n    # Assembly writing methods\n    my $outasm = Bio::Assembly::IO->new( -file   => \">SGC0-modified.tasm\",\n                                         -format => 'tigr' );\n    $outasm->write_assembly( -scaffold => $assembly,\n                             -singlets => 1 );\n\n=head1 DESCRIPTION\n\nThis package loads and writes assembly information in/from files in the default\nTIGR Assembler v2 format. The files are lassie-formatted and often have the\n.tasm extension. This module was written to be used as a driver module for\nBio::Assembly::IO input/output.\n\n=head2 Implementation\n\nAssemblies are loaded into Bio::Assembly::Scaffold objects composed of\nBio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads\nand contig gapped consensus can be obtained in the tasm files, only\naligned/gapped sequences are added to the different BioPerl objects.\n\nAdditional assembly information is stored as features. Contig objects have\nSeqFeature information associated with the primary_tag:\n\n    _main_contig_feature:$contig_id -> misc contig information\n    _quality_clipping:$read_id      -> quality clipping position\n\nRead objects have sub_seqFeature information associated with the\nprimary_tag:\n\n    _main_read_feature:$read_id     -> misc read information\n\nSinglets are considered by TIGR Assembler as contigs of one sequence and are\nrepresented here with features having these primary_tag: \n\n    _main_contig_feature:$contig_id\n    _quality_clipping:$read_primary_id\n    _main_read_feature:$read_primary_id\n    _aligned_coord:$read_primary_id\n\n=head1 THE TIGR TASM LASSIEFORMAT\n\n=head2 Description\n\nIn the TIGR tasm lassie format, contigs are separated by a line containing a single\npipe character \"|\", whereas the reads in a contig are separated by a blank line.\nSinglets can be present in the file and are represented as a contig\ncomposed of a single sequence.\n\nOther than the two above-mentioned separators, each line has an attribute name,\nfollowed a tab and then an attribute value.\n\nThe tasm format is used by more TIGR applications than just TIGR Assembler.\nSome of the attributes are not used by TIGR Assembler or have constant values.\nThey are indicated by an asterisk *\n\nContigs have the following attributes:\n\n    asmbl_id   -> contig ID\n    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)\n    lsequence  -> gapped consensus sequence (lowercase ambiguities)\n    quality    -> gapped consensus quality score (in hexadecimal)\n    seq_id     -> *\n    com_name   -> *\n    type       -> *\n    method     -> always 'asmg' *\n    ed_status  -> *\n    redundancy -> fold coverage of the contig consensus\n    perc_N     -> percent of ambiguities in the contig consensus\n    seq#       -> number of sequences in the contig\n    full_cds   -> *\n    cds_start  -> start of coding sequence *\n    cds_end    -> end of coding sequence *\n    ed_pn      -> name of editor (always 'GRA') *\n    ed_date    -> date and time of edition\n    comment    -> some comments *\n    frameshift -> *\n\nEach read has the following attributes:\n\n    seq_name  -> read name\n    asm_lend  -> position of first base on contig ungapped consensus sequence\n    asm_rend  -> position of last base on contig ungapped consensus sequence\n    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)\n    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)\n    best      -> always '0' *\n    comment   -> some comments *\n    db        -> database name associated with the sequence (e.g. >my_db|seq1234)\n    offset    -> offset of the sequence (gapped consensus coordinates)\n    lsequence -> aligned read sequence (ambiguities are uppercase)\n\nWhen asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but\nits reverse complement is shown in the aligned sequence of the assembly file,\nnot the original read.\n\nAmbiguities are reflected in the contig consensus sequence as\nlowercase IUPAC characters: a c g t u m r w s y k x n . In the read\nsequences, however, ambiguities are uppercase: M R W S Y K X N\n\n=head2 Example\n\nExample of a contig containing three sequences:\n\n    sequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    quality\t0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B\n    asmbl_id\t93\n    seq_id\t\n    com_name\t\n    type\t\n    method\tasmg\n    ed_status\t\n    redundancy\t1.11\n    perc_N\t0.20\n    seq#\t3\n    full_cds\t\n    cds_start\t\n    cds_end\t\n    ed_pn\tGRA\n    ed_date\t08/16/07 17:10:12\n    comment\t\n    frameshift\t\n\n    seq_name\tSDSU_RFPERU_010_C09.x01.phd.1\n    asm_lend\t1\n    asm_rend\t4423\n    seq_lend\t1\n    seq_rend\t442\n    best\t0\n    comment\t\n    db\t\n    offset\t0\n    lsequence\tCGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG\n\n    seq_name\tSDSU_RFPERU_002_H12.x01.phd.1\n    asm_lend\t339\n    asm_rend\t940\n    seq_lend\t1\n    seq_rend\t602\n    best\t0\n    comment\t\n    db\t\n    offset\t338\n    lsequence\tCGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC\n\n    seq_name\tSDSU_RFPERU_009_E07.x01.phd.1\n    asm_lend\t880\n    asm_rend\t1520\n    seq_lend\t641\n    seq_rend\t1\n    best\t0\n    comment\t\n    db\t\n    offset\t8803\n    lsequence\tCGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT\n    |\n\n...\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules. Send your comments and suggestions preferably to the\nBioperl mailing lists  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the BioPerl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via email\nor the web:\n\n  bioperl-bugs@bio.perl.org\n  http://bugzilla.bioperl.org/\n\n=head1 AUTHOR - Florent E Angly\n\nEmail florent dot angly at gmail dot com\n\n=head1 APPENDIX\n\nThe rest of the documentation details each of the object\nmethods. Internal methods are usually preceded with a \"_\".\n\n\npackage Bio::Assembly::IO::tigr;\n\nuse strict;\nuse Bio::Seq::Quality;\nuse Bio::LocatableSeq;\nuse Bio::Assembly::IO;\nuse Bio::Assembly::Scaffold;\nuse Bio::Assembly::Contig;\nuse Bio::Assembly::Singlet;\n\nuse base qw(Bio::Assembly::IO);\n\nmy $progname = 'TIGR Assembler';\n\n=head2 next_assembly\n\n Title   : next_assembly\n Usage   : my $scaffold = $asmio->next_assembly()\n Function: return the next assembly in the tasm-formatted stream\n Returns : Bio::Assembly::Scaffold object\n Args    : none\n\n\nsub next_assembly {\n    my $self = shift; # object reference\n    \n    # Create a new scaffold to hold the contigs\n    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);\n    \n    # Contig and read related\n    my $contigobj;\n    my $iscontig = 1;\n    my %contiginfo;\n    my $isread = 0;\n    my %readinfo;\n    \n    # Loop over all assembly file lines\n    while ($_ = $self->_readline) {\n        chomp;\n        if ( /^\\|/ ) {  # a line with a single pipe |\n            # The end of a read from a contig, the start of a new contig\n            $iscontig = 1;\n            $isread   = 0;\n            # Store read info\n            if ($contiginfo{'seqnum'} > 1) {\n                # This is a read in a contig\n                my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n            } elsif ($contiginfo{'seqnum'} == 1) {\n                # This is a singlet\n                my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                    $scaffoldobj);\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n            # Clear read info\n            undef %readinfo;\n            # Clear contig info\n            undef $contigobj;\n            undef %contiginfo;\n        } elsif ( /^$/ ) {  # a blank line\n            if ($iscontig) {\n                # The end of a contig, the start of a read in that contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store contig info\n                $contigobj = $self->_store_contig( \\%contiginfo, $contigobj,\n                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;\n            } elsif ($isread) {\n                # The end of read in a contig, the start of a new one in\n                # the same contig\n                $iscontig = 0;\n                $isread   = 1;\n                # Store read info\n                if ($contiginfo{'seqnum'} > 1) {\n                    # This is a read in a contig\n                    my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n                } elsif ($contiginfo{'seqnum'} == 1) {\n                    # This is a singlet\n                    my $singletobj = $self->_store_singlet(\\%readinfo,\n                        \\%contiginfo, $scaffoldobj);\n                } else {\n                  # That should not happen\n                  $self->throw(\"Unhandled exception\");\n                }\n                # Clear read info\n                undef %readinfo;\n            } else {\n                # That should not happen\n                $self->throw(\"Unhandled exception\");\n            }\n        } else {\n            if ($iscontig) {\n                # Parse contig\n                if    (/^sequence\\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}\n                elsif (/^lsequence\\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}\n                elsif (/^quality\\t(.*)/)      {$contiginfo{'quality'}    = $1; next}\n                elsif (/^asmbl_id\\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}\n                elsif (/^seq_id\\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}\n                elsif (/^com_name\\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}\n                elsif (/^type\\t(.*)/)         {$contiginfo{'type'}       = $1; next}\n                elsif (/^method\\t(.*)/)       {$contiginfo{'method'}     = $1; next}\n                elsif (/^ed_status\\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}\n                elsif (/^redundancy\\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}\n                elsif (/^perc_N\\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}\n                elsif (/^seq\\#\\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}\n                elsif (/^full_cds\\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}\n                elsif (/^cds_start\\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}\n                elsif (/^cds_end\\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}\n                elsif (/^ed_pn\\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}\n                elsif (/^ed_date\\t(.*\\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}\n                elsif (/^comment\\t(.*)/)      {$contiginfo{'comment'}    = $1; next}\n                elsif (/^frameshift\\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } elsif ($isread) {\n                # Parse read info\n                if    (/^seq_name\\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}\n                elsif (/^asm_lend\\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}\n                elsif (/^asm_rend\\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}\n                elsif (/^seq_lend\\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}\n                elsif (/^seq_rend\\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}\n                elsif (/^best\\t(.*)/)      {$readinfo{'best'}      = $1; next}\n                elsif (/^comment\\t(.*)/)   {$readinfo{'comment'}   = $1; next}\n                elsif (/^db\\t(.*)/)        {$readinfo{'db'}        = $1; next}\n                elsif (/^offset\\t(.*)/)    {$readinfo{'offset'}    = $1; next}\n                elsif (/^lsequence\\t(.*)/) {$readinfo{'lsequence'} = $1; next}\n                else {\n                    $self->throw(\"Format unknown at line $.:\\n$_\\nIs your file\".\n                        \" really a TIGR Assembler tasm-formatted file?\");\n                }\n            } else {\n                # That shouldn't happen\n                $self->throw(\"Unhandled exception\");                \n            }\n        }\n    }\n    # Store read info for last read\n    if (defined $contiginfo{'seqnum'}) {\n        if ($contiginfo{'seqnum'} > 1) {\n            # This is a read in a contig\n            my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n        } elsif ($contiginfo{'seqnum'} == 1) {\n            # This is a singlet\n            my $singletobj = $self->_store_singlet(\\%readinfo, \\%contiginfo,\n                $scaffoldobj);\n        } else {\n            # That should not happen\n            $self->throw(\"Unhandled exception\");\n        }\n    }\n    # Clear read info for last read\n    undef %readinfo;\n    # Clear contig info for last contig\n    undef $contigobj;\n    undef %contiginfo;\n    \n    $scaffoldobj->update_seq_list();\n    \n    return $scaffoldobj;\n}\n\n=head2 _qual_hex2dec\n\n    Title   : _qual_hex2dec\n    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);\n    Function: convert an hexadecimal quality score into a decimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_hex2dec {\n    my ($self, $qual) = @_;\n    $qual =~ s/^0x(.*)$/$1/;\n    $qual =~ s/(..)/hex($1).' '/eg;\n    return $qual;\n}\n\n=head2 _qual_dec2hex\n\n    Title   : _qual_dec2hex\n    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);\n    Function: convert a decimal quality score into an hexadecimal quality score \n    Returns : string\n    Args    : string\n\n\nsub _qual_dec2hex {\n    my ($self, $qual) = @_;\n    $qual =~ s/(\\d+)\\s*/sprintf('%02X', $1)/eg;\n    $qual = '0x'.$qual;\n    return $qual;\n}\n\n=head2 _store_contig\n\n    Title   : _store_contig\n    Usage   : my $contigobj; $contigobj = $self->_store_contig(\n              \\%contiginfo, $contigobj, $scaffoldobj);\n    Function: store information of a contig belonging to a scaffold in the\n              appropriate object\n    Returns : Bio::Assembly::Contig object\n    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold\n\n\nsub _store_contig {\n    my ($self, $contiginfo, $contigobj, $scaffoldobj) = @_;\n\n    # Create a contig and attach it to scaffold\n    $contigobj = Bio::Assembly::Contig->new(\n        -id     => $$contiginfo{'asmbl_id'},\n        -source => $progname,\n        -strand => 1\n    );\n    $scaffoldobj->add_contig($contigobj);\n\n    # Create a gapped consensus sequence and attach it to contig\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $consensus = Bio::LocatableSeq->new(\n        -id    => $$contiginfo{'asmbl_id'},\n        -seq   => $$contiginfo{'lsequence'},\n        -start => 1,\n    );\n    $contigobj->set_consensus_sequence($consensus);\n\n    # Create an gapped consensus quality score and attach it to contig\n    $$contiginfo{'quality'} = $self->_qual_hex2dec($$contiginfo{'quality'});\n    my $qual = Bio::Seq::Quality->new(\n        -id   => $$contiginfo{'asmbl_id'},\n        -qual => $$contiginfo{'quality'}\n    );\n    $contigobj->set_consensus_quality($qual);\n\n    # Add other misc contig information as features of the contig\n    my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$$contiginfo{'asmbl_id'}\",\n        -start       => 1,\n        -end         => $contigobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n    );\n    $contigobj->add_features([ $contigtags ], 1);\n\n    return $contigobj;\n}\n\n=head2 _store_read\n\n    Title   : _store_read\n    Usage   : my $readobj = $self->_store_read(\\%readinfo, $contigobj);\n    Function: store information of a read belonging to a contig in the appropriate object\n    Returns : Bio::LocatableSeq\n    Args    : hash, Bio::Assembly::Contig\n\n\nsub _store_read {\n   my ($self, $readinfo, $contigobj) = @_;\n\n   # Create an aligned read object\n   #$$readinfo{'llength'} = length($$readinfo{'lsequence'});\n   $$readinfo{'strand'}  = ($$readinfo{'seq_rend'} > $$readinfo{'seq_lend'} ? 1 : -1);\n   my $readobj = Bio::LocatableSeq->new(\n       # the ids of sequence objects are supposed to include the db name in it, i.e. \"big_db|seq1234\"\n       # that's how sequence ids coming from the fasta parser are at least\n       -display_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -primary_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),\n       -seq        => $$readinfo{'lsequence'},      \n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna'\n   );\n\n   # Add read location and sequence to contig (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => $readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigobj->id() }\n   );\n   $contigobj->set_seq_coord($alncoord, $readobj);\n\n   # Add quality clipping read information in contig features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_lend'});\n   $$readinfo{'clip_end'}   = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_rend'});\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_quality_clipping:'.$readobj->id,\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'}\n   );\n   $clipcoord->attach_seq($readobj);\n   $contigobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => '_main_read_feature:'.$readobj->id,\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n\n   return $readobj;\n}\n\n=head2 _store_singlet\n\n    Title   : _store_singlet\n    Usage   : my $singletobj = $self->_store_read(\\%readinfo, \\%contiginfo,\n                  $scaffoldobj);\n    Function: store information of a singlet belonging to a scaffold in the appropriate object\n    Returns : Bio::Assembly::Singlet\n    Args    : hash, hash, Bio::Assembly::Scaffold\n\n\nsub _store_singlet {\n    my ($self, $readinfo, $contiginfo, $scaffoldobj) = @_;\n    # Singlets in TIGR_Assembler are represented as a contig of one sequence\n    # We try to simulate this duality by playing around with the Singlet object\n    \n    my $contigid = $$contiginfo{'asmbl_id'};\n    my $readid   = $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'});\n    \n    # Create a sequence object\n    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});\n    my $seqobj = Bio::Seq::Quality->new(\n       -primary_id => $contigid, # unique id in assembly (contig name)\n       -display_id => $readid,\n       -seq        => $$contiginfo{'lsequence'}, # do not use $$readinfo as ambiguities are uppercase\n       -start      => 1,\n       -strand     => $$readinfo{'strand'},\n       -alphabet   => 'dna',\n       -qual => $self->_qual_hex2dec($$contiginfo{'quality'})    \n   );\n\n   # Create singlet from sequence and add it to scaffold\n   my $singletobj = Bio::Assembly::Singlet->new( -seqref => $seqobj );\n   $scaffoldobj->add_singlet($singletobj);\n\n   # Add other misc contig information as features of the singlet\n   my $contigtags = Bio::SeqFeature::Generic->new(\n        -primary_tag => \"_main_contig_feature:$contigid\",\n        -start       => 1,\n        -end         => $singletobj->get_consensus_length(),\n        -strand      => 1,\n        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},\n                          'com_name'   => $$contiginfo{'com_name'},\n                          'type'       => $$contiginfo{'type'},\n                          'method'     => $$contiginfo{'method'},\n                          'ed_status'  => $$contiginfo{'ed_status'},\n                          'full_cds'   => $$contiginfo{'full_cds'},\n                          'cds_start'  => $$contiginfo{'cds_start'},\n                          'cds_end'    => $$contiginfo{'cds_end'},\n                          'ed_pn'      => $$contiginfo{'ed_pn'},\n                          'ed_date'    => $$contiginfo{'ed_date'},\n                          'comment'    => $$contiginfo{'comment'},\n                          'frameshift' => $$contiginfo{'frameshift'} }\n   );\n   $singletobj->add_features([ $contigtags ], 1);\n\n   # Add read location and sequence to singlet features (in 'gapped consensus' coordinates)\n   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates\n   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq\n\n   my $alncoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_aligned_coord:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $alncoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $alncoord ], 0);\n\n   # Add quality clipping read information in singlet features\n   # (from 'aligned read' to 'gapped consensus' coordinates)\n   $$readinfo{'clip_start'} = $$readinfo{'seq_lend'};\n   $$readinfo{'clip_end'}   = $$readinfo{'seq_rend'};\n   my $clipcoord = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_quality_clipping:$readid\",\n       -start       => $$readinfo{'clip_start'},\n       -end         => $$readinfo{'clip_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'contig' => $contigid }\n   );\n   $clipcoord->attach_seq($singletobj->seqref);\n   $singletobj->add_features([ $clipcoord ], 0);\n   \n   # Add other misc read information as subsequence feature\n   my $readtags = Bio::SeqFeature::Generic->new(\n       -primary_tag => \"_main_read_feature:$readid\",\n       -start       => $$readinfo{'aln_start'},\n       -end         => $$readinfo{'aln_end'},\n       -strand      => $$readinfo{'strand'},\n       -tag         => { 'best'    => $$readinfo{'best'},\n                         'comment' => $$readinfo{'comment'} }\n   );\n   $alncoord->add_sub_SeqFeature($readtags);\n      \n   return $singletobj;\n}\n\n=head2 write_assembly\n\n    Title   : write_assembly\n    Usage   : $ass_io->write_assembly($assembly)\n    Function: Write the assembly object in TIGR Assembler compatible tasm lassie  \n              format\n    Returns : 1 on success, 0 for error\n    Args    : A Bio::Assembly::Scaffold object\n\n\nsub write_assembly {\n    my ($self,@args) = @_;    \n    my ($scaffoldobj, $singlets) = $self->_rearrange([qw(SCAFFOLD SINGLETS)], @args);\n    \n    # Sanity check\n    if ( !$scaffoldobj || !$scaffoldobj->isa('Bio::Assembly::Scaffold') ) {\n        $self->warn(\"Must provide a Bio::Align::AlignI object when calling\n            write_assembly\");\n        next;\n    }\n\n    # Get list of objects - contigs and singlets\n    my @cont_ids = $scaffoldobj->get_contig_ids;\n    my @sing_ids = $scaffoldobj->get_singlet_ids;\n    my %did;\n    my $decimal_format = '%.2f';\n    for (my $i = 0; $i < scalar @sing_ids ; $i++) {\n      # singlet display id (string)\n      my $display_id = $sing_ids[$i];\n      # singlet primary id (unique, numerical)\n      my $primary_id = $scaffoldobj->get_singlet_by_id($display_id)->seqref->primary_id;\n      $sing_ids[$i] = $primary_id;\n      $did{$primary_id} = $display_id;\n    }\n    my @ids = (@cont_ids, @sing_ids);\n    @ids = sort { $a <=> $b } @ids; # list with contig ids and singlet primary id\n    my $numobj = scalar @ids;\n\n    # Output all contigs and singlets (sorted by increasing id number)\n    for (my $i = 0 ; $i < $numobj ; $i++) {\n        \n        my $objid = $ids[$i];\n        \n        if (defined $did{$objid}) { \n            # This is a singlet\n            next unless ($singlets);\n\n            my $contigid = $objid;\n            my $readid   = $did{$objid};            \n            my $singletobj = $scaffoldobj->get_singlet_by_id($readid);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $singletobj->seqref->seq;\n            $contiginfo{'lsequence'}  = $contiginfo{'sequence'};\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$singletobj->seqref->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];   \n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf($decimal_format, 1);\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = 1;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n\n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n            \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n                        \n            # Get read information\n            my ($seq_name, $db) = $self->_split_seq_name_and_db($readid);\n            my $clipcoord = (grep\n                { $_->primary_tag eq \"_quality_clipping:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $alncoord  = (grep\n                { $_->primary_tag eq \"_aligned_coord:$readid\"}\n                $singletobj->get_features_collection->get_all_features\n            )[0];\n            my $readanno = (grep\n                { $_->primary_tag eq \"_main_read_feature:$readid\" }\n                $singletobj->get_seq_coord($singletobj->seqref)->get_SeqFeatures\n            )[0];\n            my %readinfo;\n            $readinfo{'seq_name'}  = $seq_name;\n            $readinfo{'asm_lend'}  = $alncoord->location->start;\n            $readinfo{'asm_rend'}  = $alncoord->location->end;\n            $readinfo{'seq_lend'}  = $clipcoord->location->start;\n            $readinfo{'seq_rend'}  = $clipcoord->location->end;\n            $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n            $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n            $readinfo{'db'}        = $db;         \n            $readinfo{'offset'}    = 0;\n            # ambiguities in read sequence are uppercase\n            $readinfo{'lsequence'} = uc($contiginfo{'lsequence'});\n            \n            # Check that no tag value is undef\n            $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n            $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n\n            # Print read information\n            $self->_print(\n                \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                \"best\\t$readinfo{'best'}\\n\".\n                \"comment\\t$readinfo{'comment'}\\n\".\n                \"db\\t$readinfo{'db'}\\n\".\n                \"offset\\t$readinfo{'offset'}\\n\".\n                \"lsequence\\t$readinfo{'lsequence'}\\n\"\n            );\n            if ($i+1 < $numobj) {\n                $self->_print(\"|\\n\");\n            }\n        } else {\n            # This is a contig\n            my $contigid = $objid;\n            my $contigobj = $scaffoldobj->get_contig_by_id($contigid);\n\n            # Skip contigs of 1 sequence (singlets) if needed\n            next if ($contigobj->num_sequences == 1) && (!$singlets);\n            \n            # Get contig information\n            my $contanno = (grep\n                { $_->primary_tag eq \"_main_contig_feature:$contigid\" }\n                $contigobj->get_features_collection->get_all_features\n            )[0];\n            my %contiginfo;\n            $contiginfo{'sequence'}   = $self->_ungap(\n                $contigobj->get_consensus_sequence->seq);\n            $contiginfo{'lsequence'}  = $contigobj->get_consensus_sequence->seq;\n            $contiginfo{'quality'}    = $self->_qual_dec2hex(\n                join ' ', @{$contigobj->get_consensus_quality->qual});\n            $contiginfo{'asmbl_id'}   = $contigid;\n            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];\n            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];\n            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];\n            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];\n            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];\n            $contiginfo{'redundancy'} = sprintf(\n                $decimal_format, $self->_redundancy($contigobj));\n            $contiginfo{'perc_N'}     = sprintf(\n                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));\n            $contiginfo{'seqnum'}     = $contigobj->num_sequences;\n            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];\n            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];\n            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];\n            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];\n            $contiginfo{'ed_date'}    = $self->_date_time;\n            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];\n            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];\n            \n            # Check that no tag value is undef\n            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};\n            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};\n            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};\n            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};\n            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};\n            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};\n            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};\n            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};\n            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};\n            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};\n            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};\n                       \n            # Print contig information\n            $self->_print(\n                \"sequence\\t$contiginfo{'sequence'}\\n\".\n                \"lsequence\\t$contiginfo{'lsequence'}\\n\".\n                \"quality\\t$contiginfo{'quality'}\\n\".\n                \"asmbl_id\\t$contiginfo{'asmbl_id'}\\n\".\n                \"seq_id\\t$contiginfo{'seq_id'}\\n\".\n                \"com_name\\t$contiginfo{'com_name'}\\n\".\n                \"type\\t$contiginfo{'type'}\\n\".\n                \"method\\t$contiginfo{'method'}\\n\".\n                \"ed_status\\t$contiginfo{'ed_status'}\\n\".\n                \"redundancy\\t$contiginfo{'redundancy'}\\n\".\n                \"perc_N\\t$contiginfo{'perc_N'}\\n\".\n                \"seq#\\t$contiginfo{'seqnum'}\\n\".\n                \"full_cds\\t$contiginfo{'full_cds'}\\n\".\n                \"cds_start\\t$contiginfo{'cds_start'}\\n\".\n                \"cds_end\\t$contiginfo{'cds_end'}\\n\".\n                \"ed_pn\\t$contiginfo{'ed_pn'}\\n\".\n                \"ed_date\\t$contiginfo{'ed_date'}\\n\".\n                \"comment\\t$contiginfo{'comment'}\\n\".\n                \"frameshift\\t$contiginfo{'frameshift'}\\n\".\n                \"\\n\"\n            );\n            my $seqno = 0;\n            for my $readobj ( $contigobj->each_seq() ) {\n                $seqno++;\n                \n                # Get read information\n                my ($seq_name, $db) = $self->_split_seq_name_and_db($readobj->id);\n                my ($asm_lend, $asm_rend, $seq_lend, $seq_rend, $offset)\n                    = $self->_coord($readobj, $contigobj);\n                my $readanno = ( grep \n                    { $_->primary_tag eq '_main_read_feature:'.$readobj->primary_id }\n                    $contigobj->get_seq_coord($readobj)->get_SeqFeatures\n                )[0];\n                my %readinfo;                \n                $readinfo{'seq_name'}  = $seq_name;\n                $readinfo{'asm_lend'}  = $asm_lend;\n                $readinfo{'asm_rend'}  = $asm_rend;\n                $readinfo{'seq_lend'}  = $seq_lend;\n                $readinfo{'seq_rend'}  = $seq_rend;                \n                $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];\n                $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];\n                $readinfo{'db'}        = $db;\n                $readinfo{'offset'}    = $offset;   \n                $readinfo{'lsequence'} = $readobj->seq(); \n                         \n                # Check that no tag value is undef\n                $readinfo{'best'}    = '' unless defined $readinfo{'best'};\n                $readinfo{'comment'} = '' unless defined $readinfo{'comment'};\n    \n                # Print read information\n                $self->_print(\n                    \"seq_name\\t$readinfo{'seq_name'}\\n\".\n                    \"asm_lend\\t$readinfo{'asm_lend'}\\n\".\n                    \"asm_rend\\t$readinfo{'asm_rend'}\\n\".\n                    \"seq_lend\\t$readinfo{'seq_lend'}\\n\".\n                    \"seq_rend\\t$readinfo{'seq_rend'}\\n\".\n                    \"best\\t$readinfo{'best'}\\n\".\n                    \"comment\\t$readinfo{'comment'}\\n\".\n                    \"db\\t$readinfo{'db'}\\n\".\n                    \"offset\\t$readinfo{'offset'}\\n\".\n                    \"lsequence\\t$readinfo{'lsequence'}\\n\"\n                );\n                if ($seqno < $contiginfo{'seqnum'}) {\n                    $self->_print(\"\\n\");\n                } elsif (($seqno == $contiginfo{'seqnum'}) && ($i+1 < $numobj)) {\n                    $self->_print(\"|\\n\");\n                }\n            }\n        }\n    }\n    return 1;\n}\n\n=head2 _perc_N\n\n    Title   : _perc_N\n    Usage   : my $perc_N = $ass_io->_perc_N($sequence_string)\n    Function: Calculate the percent of ambiguities in a sequence.\n              M R W S Y K X N are regarded as ambiguites in an aligned read\n              sequence by TIGR Assembler. In the case of a gapped contig\n              consensus sequence, all lowercase symbols are ambiguities, i.e.:\n              a c g t u m r w s y k x n.\n    Returns : decimal number\n    Args    : string\n\n\nsub _perc_N {\n    my ($self, $seq_string) = @_;\n    $self->throw(\"Cannot accept an empty sequence\") if length($seq_string) == 0;\n    my $perc_N = 0;\n    for my $base ( split //, $seq_string ) {\n        # individual base matches an ambiguity?\n        if (( $base =~ m/[x|n|m|r|w|s|y|k]/i ) || ( $base =~ m/[a|c|g|t|u]/ ) ) {\n            $perc_N++;\n        }\n    }\n    $perc_N = $perc_N * 100 / length $seq_string;\n    return $perc_N;\n}\n\n=head2 _redundancy\n\n    Title   : _redundancy\n    Usage   : my $ref = $ass_io->_redundancy($contigobj)\n    Function: Calculate the fold coverage (redundancy) of a contig consensus\n              (average number of read base pairs covering the consensus)\n    Returns : decimal number\n    Args    : Bio::Assembly::Contig\n\n\nsub _redundancy {\n    # redundancy = (sum of all aligned read lengths - ( number of gaps in gapped\n    # consensus + number of gaps in aligned reads that are also in the consensus ) )\n    # / length of ungapped consensus\n    my ($self, $contigobj) = @_;\n    my $redundancy = 0;\n    \n    # sum of all aligned read lengths\n    my $read_tot = 0;\n    for my $readobj ( $contigobj->each_seq ) {\n        my $read_length = length($readobj->seq);\n        $read_tot += $read_length;\n    }\n    $redundancy += $read_tot;\n    \n    # - respected gaps\n    my $consensus_sequence = $contigobj->get_consensus_sequence->seq;\n    my @consensus_gaps = ();\n    $contigobj->_register_gaps($consensus_sequence, \\@consensus_gaps);\n    my $respected_gaps = scalar(@consensus_gaps);\n    if ($respected_gaps > 0) {\n        my @cons_arr = split //, $consensus_sequence;\n        for my $gap_pos_cons ( @consensus_gaps ) {\n            for my $readobj ( $contigobj->each_seq ) {\n                my $readid = $readobj->id;\n                my $read_start = $contigobj->change_coord(\n                    \"aligned $readid\", 'gapped consensus', $readobj->start);\n                my $read_end   = $contigobj->change_coord(\n                    \"aligned $readid\", 'gapped consensus', $readobj->end  );\n                # skip this if consensus gap position not within in the read boundaries\n                next if ( ($gap_pos_cons < $read_start)\n                    || ($gap_pos_cons > $read_end) );\n                # does the read position have read have a gap?\n                my @read_arr = split //, $readobj->seq;                \n                my $gap_pos_read = $contigobj->change_coord(\n                    'gapped consensus', \"aligned $readid\", $gap_pos_cons);\n                if ($read_arr[$gap_pos_read-1] eq $cons_arr[$gap_pos_cons-1]) {\n                    $respected_gaps++;\n                }\n            }\n        }\n    }\n    $redundancy -= $respected_gaps;\n    \n    # / length of ungapped consensus\n    my $contig_length = length($self->_ungap($contigobj->get_consensus_sequence->seq));\n    $redundancy /= $contig_length;\n    \n    return $redundancy;\n}\n\n=head2 _ungap\n\n    Title   : _ungap\n    Usage   : my $ungapped = $ass_io->_ungap($gapped)\n    Function: Remove the gaps from a sequence. Gaps are - in TIGR Assembler\n    Returns : string\n    Args    : string\n\n\nsub _ungap {\n    my ($self, $seq_string) = @_;\n    $seq_string =~ s/-//g;\n    return $seq_string;\n}\n\n=head2 _date_time\n\n    Title   : _date_time\n    Usage   : my $timepoint = $ass_io->date_time\n    Function: Get date and time (MM//DD/YY HH:MM:SS)\n    Returns : string\n    Args    : none\n\n\nsub _date_time {\n    my ($self) = @_;\n    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);\n    my $formatted_date_time = \n        sprintf('%02d', $mon+1).'/'.\n        sprintf('%02d', $mday).'/'.\n        sprintf('%02d', $year % 100).\n        ' '.\n        sprintf('%02d', $hour).':'.\n        sprintf('%02d', $min).':'.\n        sprintf('%02d',$sec)\n    ;\n    return $formatted_date_time;\n}\n\n=head2 _split_seq_name_and_db\n\n    Title   : _split_seq_name_and_db\n    Usage   : my ($seqname, $db) = $ass_io->_split_seq_name_and_db($id)\n    Function: Extract seq_name and db from sequence id\n    Returns : seq_name, db\n    Args    : id\n\n\nsub _split_seq_name_and_db {\n    my ($self, $id) = @_;\n    my $seq_name = '';\n    my $db       = '';\n    if ($id =~ m/(\\S+)\\|(\\S+)/) {\n        $db       = $1;\n        $seq_name = $2;\n    } else {\n        $seq_name = $id;\n    }\n    return ($seq_name, $db);\n}\n\n=head2 _merge_seq_name_and_db\n\n    Title   : _merge_seq_name_and_db\n    Usage   : my $id = $ass_io->_merge_seq_name_and_db($seq_name, $db)\n    Function: Construct id from seq_name and db\n    Returns : id\n    Args    : seq_name, db\n\n\nsub _merge_seq_name_and_db {\n    my ($self, $seq_name, $db) = @_;\n    my $id = '';\n    if ($db) {\n        $id = $db.'|'.$seq_name;\n    } else {\n        $id = $seq_name;\n    }\n    return $id;\n}\n\n=head2 _coord\n\n    Title   : _coord\n    Usage   : my $id = $ass_io->__coord($readobj, $contigobj)\n    Function: Get different coordinates for the read\n    Returns : number, number, number, number, number\n    Args    : Bio::Assembly::Seq, Bio::Assembly::Contig"},"range":{"start":{"line":1130,"character":0},"end":{"line":1160,"character":9999}},"kind":12,"line":1130,"detail":"($self,$readobj,$contigobj)","definition":"sub","containerName":"main::","name":"_coord","children":[{"definition":"my","line":1131,"localvar":"my","containerName":"_coord","kind":13,"name":"$self"},{"line":1131,"kind":13,"containerName":"_coord","name":"$readobj"},{"line":1131,"kind":13,"containerName":"_coord","name":"$contigobj"},{"name":"$asm_lend","containerName":"_coord","localvar":"my","kind":13,"line":1132,"definition":"my"},{"line":1132,"name":"$asm_rend","containerName":"_coord","kind":13},{"name":"$seq_lend","containerName":"_coord","kind":13,"line":1132},{"line":1132,"kind":13,"containerName":"_coord","name":"$seq_rend"},{"line":1132,"kind":13,"containerName":"_coord","name":"$offset"},{"line":1136,"localvar":"my","kind":13,"containerName":"_coord","name":"$aln_lend","definition":"my"},{"line":1136,"containerName":"_coord","kind":13,"name":"$contigobj"},{"line":1136,"name":"get_seq_coord","kind":12,"containerName":"_coord"},{"name":"$readobj","kind":13,"containerName":"_coord","line":1136},{"line":1136,"kind":12,"containerName":"_coord","name":"location"},{"name":"start","containerName":"_coord","kind":12,"line":1136},{"name":"$aln_rend","kind":13,"localvar":"my","containerName":"_coord","line":1137,"definition":"my"},{"name":"$contigobj","kind":13,"containerName":"_coord","line":1137},{"line":1137,"containerName":"_coord","kind":12,"name":"get_seq_coord"},{"containerName":"_coord","kind":13,"name":"$readobj","line":1137},{"line":1137,"name":"location","kind":12,"containerName":"_coord"},{"line":1137,"kind":12,"containerName":"_coord","name":"end"},{"name":"$asm_lend","kind":13,"containerName":"_coord","line":1138},{"containerName":"_coord","kind":13,"name":"$contigobj","line":1138},{"line":1138,"kind":12,"containerName":"_coord","name":"change_coord"},{"line":1139,"name":"$aln_lend","containerName":"_coord","kind":13},{"line":1140,"containerName":"_coord","kind":13,"name":"$asm_rend"},{"line":1140,"containerName":"_coord","kind":13,"name":"$contigobj"},{"name":"change_coord","containerName":"_coord","kind":12,"line":1140},{"line":1141,"containerName":"_coord","kind":13,"name":"$aln_rend"},{"definition":"my","localvar":"my","kind":13,"containerName":"_coord","name":"$readclip","line":1145},{"line":1146,"kind":12,"containerName":"_coord","name":"primary_tag"},{"name":"$readobj","containerName":"_coord","kind":13,"line":1146},{"kind":12,"containerName":"_coord","name":"primary_id","line":1146},{"kind":13,"containerName":"_coord","name":"$contigobj","line":1147},{"name":"get_features_collection","containerName":"_coord","kind":12,"line":1147},{"kind":12,"containerName":"_coord","name":"get_all_features","line":1148},{"definition":"my","localvar":"my","kind":13,"containerName":"_coord","name":"$clip_lend","line":1149},{"line":1149,"kind":13,"containerName":"_coord","name":"$readclip"},{"line":1149,"name":"location","containerName":"_coord","kind":12},{"name":"start","containerName":"_coord","kind":12,"line":1149},{"name":"$clip_rend","localvar":"my","containerName":"_coord","kind":13,"line":1150,"definition":"my"},{"line":1150,"containerName":"_coord","kind":13,"name":"$readclip"},{"name":"location","kind":12,"containerName":"_coord","line":1150},{"name":"end","kind":12,"containerName":"_coord","line":1150},{"kind":13,"containerName":"_coord","name":"$seq_lend","line":1151},{"kind":13,"containerName":"_coord","name":"$contigobj","line":1151},{"line":1151,"kind":12,"containerName":"_coord","name":"change_coord"},{"kind":13,"containerName":"_coord","name":"$readobj","line":1152},{"name":"id","kind":12,"containerName":"_coord","line":1152},{"name":"$clip_lend","kind":13,"containerName":"_coord","line":1152},{"containerName":"_coord","kind":13,"name":"$seq_rend","line":1153},{"line":1153,"containerName":"_coord","kind":13,"name":"$contigobj"},{"kind":12,"containerName":"_coord","name":"change_coord","line":1153},{"line":1154,"name":"$readobj","containerName":"_coord","kind":13},{"name":"id","kind":12,"containerName":"_coord","line":1154},{"containerName":"_coord","kind":13,"name":"$clip_rend","line":1154},{"name":"$offset","containerName":"_coord","kind":13,"line":1157},{"kind":13,"containerName":"_coord","name":"$aln_lend","line":1157},{"line":1159,"name":"$asm_lend","kind":13,"containerName":"_coord"},{"line":1159,"kind":13,"containerName":"_coord","name":"$asm_rend"},{"line":1159,"kind":13,"containerName":"_coord","name":"$seq_lend"},{"line":1159,"containerName":"_coord","kind":13,"name":"$seq_rend"},{"line":1159,"containerName":"_coord","kind":13,"name":"$offset"}]}]}