"""This tool merges given sets of paired reads into one pseudo fastq file. Several input file can be given - the list of files containing the pairs MUST have the same order for first and second in pair! (-p PAIRED_1_1,PAIRED_2_1,PAIRED_3_1,... -P PAIRED_1_2,PAIRED_2_2,PAIRED_3_2,...)""" import sys def parse_options(argv): """Parses options from the command line """ from optparse import OptionParser, OptionGroup parser = OptionParser() required = OptionGroup(parser, 'REQUIRED') required.add_option('-p', '--pair_first', dest='pair_1', metavar='FILE', help='comma separated list of files containing the first reads of the pair', default='-') required.add_option('-P', '--paor_second', dest='pair_2', metavar='FILE', help='comma separated list of files containing the second reads of the pair', default='-') required.add_option('-o', '--outfile', dest='outfile', metavar='FILE', help='file to store the merged file in pseudo fastq', default='-') parser.add_option_group(required) (options, args) = parser.parse_args() if len(argv) < 4 : parser.print_help() sys.exit(2) return options def main(): """Main unction parsing and merging the paired files. """ options = parse_options(sys.argv) ### open outstream outfile = open(options.outfile, 'w') pair_1_list = options.pair_1.strip().split(',') pair_2_list = options.pair_2.strip().split(',') assert(len(pair_1_list) == len(pair_2_list)) for idx in range(len(pair_1_list)): pair_1_file = pair_1_list[idx] pair_2_file = pair_2_list[idx] ### open first of pair pair_1 = open(pair_1_file, 'r') ### open second of pair pair_2 = open(pair_2_file, 'r') counter = 1 line1 = pair_1.readline() line2 = pair_2.readline() while line1 and line2: if counter % 100000 == 0: print 'lines [ %s ] in %s and %s' % (counter, pair_1_file, pair_2_file) if counter % 4 == 1: # read_id print >> outfile, line1.strip()[1:-2] + '\t', elif counter % 4 == 2: # sequence print >> outfile, line1.strip() + '\t' + line2.strip() + '\t', elif counter % 4 == 0: # quality print >> outfile, line1.strip() + '\t' + line2.strip() line1 = pair_1.readline() line2 = pair_2.readline() counter += 1 pair_1.close() pair_2.close() outfile.close() if __name__ == "__main__": main()