"""This script removes quality and read info from a given SAM file """ import sys import re import genome_utils def parse_options(argv): """Parses options from the command line """ from optparse import OptionParser, OptionGroup parser = OptionParser() required = OptionGroup(parser, 'REQUIRED') required.add_option('-i', '--infile', dest='infile', metavar='FILE', help='input file in SAM format', default='-') required.add_option('-o', '--outfile', dest='outfile', metavar='FILE', help='output file in SAM format', default='-') optional = OptionGroup(parser, 'OPTIONAL') optional.add_option('-q', '--quality_only', dest='quality_only', action='store_true', help='removes only quality information from the SAM file', default=False) optional.add_option('-s', '--seq_only', dest='sequence_only', action='store_true', help='removes only sequence information from the SAM file', default=False) optional.add_option('-v', '--verbose', dest='verbose', action='store_true', help='verbosity', default=False) parser.add_option_group(required) parser.add_option_group(optional) (options, args) = parser.parse_args() if len(argv) < 3: parser.print_help() sys.exit(2) return options def main(): options = parse_options(sys.argv) line_counter = 0 outf = open(options.outfile, 'w') for line in open(options.infile, 'r'): line_counter += 1 if options.verbose and (line_counter % 10000) == 0: print '[ line %s ]' % line_counter if line[0] in ['#', '@']: print >> outf, line continue sl = line.strip().split('\t') ### 9 - SEQUENCE if not options.quality_only: if len(sl) > 9: sl[9] = '*' ### 10 - QUALITY if not options.sequence_only: if len(sl) > 10: sl[10] = '*' ### build up line again _line = '' for b in sl: _line += (b+ '\t') print >> outf, _line[:-1] outf.close() if __name__ == '__main__': main()