// Authors: Korbinian Schneeberger and Joerg Hagmann
// Copyright (C) 2008 by Max-Planck Institute for Developmental Biology, Tuebingen, Germany

#include "genomemapper.h"

int get_slot(int pos);
void encode_read();
char *decode_read(char *read);

int read_short_read()
{
	char line[10000];
	char *tmp;
	int linelen;

	if (fgets(line, 10000, QUERY_FP) == NULL) {
		if (READ_LENGTH == 0) fprintf(stderr, "\n!!! WARNING: Input read file '%s' is empty!\n\n", QUERY_FILE_NAME);
		return 1;
	}
	++linenr;
		
	if (strcspn(line, " \n\t") == 0) {
		do {
			if (fgets(line, 10000, QUERY_FP) == NULL) {
				if (READ_LENGTH == 0) fprintf(stderr, "\n!!! WARNING: Input read file '%s' is empty!\n\n", QUERY_FILE_NAME);  
				return 1;
			}
			++linenr;
		} while (strcspn(line, " \n\t") == 0);
	}
	
	linelen = strlen(line);
	if (linelen < 3) {
		fprintf(stderr, "ERROR: Unknown read input format! Do all the reads have an identifier?\n");
		exit(0);
	}
	
	if (line[0] == '@') {
		
		/////// FastQ input ///////
		
		// R E A D _ I D
		strncpy(READ_ID, line+1, strcspn(line, " \t\n")-1);
		
		do {
			if (fgets(line, 10000, QUERY_FP) == NULL) {
				fprintf(stderr, "ERROR: Read '%s' in line %lu is not complete in input query file '%s'! Missing read sequence and quality!\n", READ_ID, linenr, QUERY_FILE_NAME);
				exit(0);
			}
			++linenr;
		} while (strcspn(line, " \t\n") == 0);

		// R E A D
		strncpy(READ, line, strcspn(line, " \t\n"));
		if (strlen(READ) > MAX_READ_LENGTH) {
			fprintf(stderr, "\n!!! WARNING: Read '%s' in line %lu is longer than the max read length (=%d)! It will be omitted!\n\n", READ_ID, linenr, MAX_READ_LENGTH);
			return -1;
		}
		else if (strlen(READ) == 0) {
			fprintf(stderr, "ERROR: Cannot find read sequence of read '%s' in line %lu in input query file '%s'!\n", READ_ID, linenr, QUERY_FILE_NAME);
			exit(0);
		}
		if (strcspn(READ, "aAcCgGtTnNrRyYmMkKwWsSbBdDhHvV") != 0) {
			fprintf(stderr, "\n!!! WARNING: Read '%s' in line %lu contains non-IUPAC characters! It will be omitted!\n\n", READ_ID, linenr);
			return -1;
		}
		
		if (strlen(READ) < INDEX_DEPTH) { 
			fprintf(stderr, "\n!!! WARNING: Read '%s' in line %lu is shorter than the specified seedlength! It will be omitted!\n\n", READ_ID, linenr);
			return -1;
		}
		
		do {
			if (fgets(line, 10000, QUERY_FP) == NULL) {
				fprintf(stderr, "ERROR: Read '%s' in line %lu is not complete in input query file '%s'! Missing quality!\n", READ_ID, linenr, QUERY_FILE_NAME);
				exit(0);
			}
			++linenr;
		} while (strcspn(line, " \t\n") == 0);

		// +
		if (strlen(line) < 1 || line[0] != '+') {
			fprintf(stderr, "ERROR: Read '%s' in line %lu is not in fastq format!\n", READ_ID, linenr);
			exit(0);
		}
		
		do {
			if (fgets(line, 10000, QUERY_FP) == NULL) {
				fprintf(stderr, "ERROR: Read '%s' in line %lu is not complete in input query file '%s'! Missing quality!\n", READ_ID, linenr, QUERY_FILE_NAME);
				exit(0);
			}
			++linenr;
		} while (strcspn(line, " \t\n") == 0);
		
		// Q U A L I T Y
		if (strlen(line) > 0) strncpy(READ_QUALITY[0], line, strcspn(line, " \t\n"));
		else {
			fprintf(stderr, "ERROR: Cannot find read quality of read '%s' in line %lu in input query file '%s'!\n", READ_ID, linenr, QUERY_FILE_NAME);
			exit(0);
		}
		
		/*if (strlen(READ_QUALITY[0]) != READ_LENGTH) {
			fprintf(stderr, "ERROR: Read quality 1 of read '%s' in line %lu hasn't length of read!\n", READ_ID, linenr);
			exit(0);
		}*/
		
		READ_LENGTH = strlen(READ);
		
		// O T H E R
		READ_PE_FLAG = 0;
		READ_QUALITY[1] = "";
		READ_QUALITY[2] = "";
		
		READ_FORMAT = 0;
		
	}
	else if (line[0] == '>') {
		
		/////// Fasta input ///////
		
		strncpy(READ_ID, line+1, strcspn(line, " \t\n")-1);
		
		do {
			if (fgets(line, 10000, QUERY_FP) == NULL) {
				fprintf(stderr, "ERROR: Read '%s' in line %lu is not complete in input query file '%s'! Missing read sequence!\n", READ_ID, linenr, QUERY_FILE_NAME);
				exit(0);
			}
			++linenr;
		} while (strcspn(line, " \t\n") == 0);
		
		// R E A D
		strncpy(READ, line, strcspn(line, " \t\n"));
		if (strlen(READ) > MAX_READ_LENGTH) {
			fprintf(stderr, "\n!!! WARNING: Read '%s' in line %lu is longer than the max read length (=%d)! It will be omitted!\n\n", READ_ID, linenr, MAX_READ_LENGTH);
			return -1;
		}
		else if (strlen(READ) == 0) {
			fprintf(stderr, "ERROR: Cannot find read sequence of read '%s' in line %lu in input query file '%s'!\n", READ_ID, linenr, QUERY_FILE_NAME);
			exit(0);
		}		
		if (strcspn(READ, "aAcCgGtTnNrRyYmMkKwWsSbBdDhHvV") != 0) {
			fprintf(stderr, "\n!!! WARNING: Read '%s' in line %lu contains non-IUPAC characters! It will be omitted!\n\n", READ_ID, linenr);
			return -1;
		}
		
		if (strlen(READ) < INDEX_DEPTH) { 
			fprintf(stderr, "\n!!! WARNING: Read '%s' in line %lu is shorter than the specified seedlength! It will be omitted!\n\n", READ_ID, linenr);
			return -1;
		}
		
		READ_LENGTH = strlen(READ);

		READ_PE_FLAG = 0;
		READ_QUALITY[0] = "";
		READ_QUALITY[1] = "";
		READ_QUALITY[2] = "";
		
		READ_FORMAT = 1;
		
	}
	else {
		
		/////// Flatfile input ///////
		READ = strtok(&(line[0]), "\t");

		READ_ID = READ;
		if (strlen(READ_ID) == linelen) {
			fprintf(stderr, "ERROR: wrong read input data format, line %lu! Are columns tab-delimited?\n", linenr);
			exit(0);
		}
		if (READ_ID == NULL) {
			fprintf(stderr, "ERROR: Read ID is empty, line %lu!\n", linenr);
			exit(0);
		}
		
		//@TODO read is cut to MAX_READ_LENGTH (=1000) without notice! how to determine???
		READ = strtok('\0', "\t");
		
		if (strlen(READ) > MAX_READ_LENGTH) {
			fprintf(stderr, "\n!!! WARNING: Read '%s' in line %lu is longer than the max read length (=%d)! It will be omitted!\n\n", READ_ID, linenr, MAX_READ_LENGTH);
			return -1;
		}
		if (READ == NULL) {
			fprintf(stderr, "ERROR: Read sequence is empty, line %lu!\n", linenr);
			exit(0);
		}   
		//printf("%s sp: %d\n",READ,(int) strcspn(READ, "A"));
		if (strcspn(READ, "aAcCgGtTnNrRyYmMkKwWsSbBdDhHvV") != 0) {
			fprintf(stderr, "\n!!! WARNING: Read '%s' in line %lu contains non-IUPAC characters! It will be omitted!\n\n", READ_ID, linenr);
			return -1;
		}
			
		READ_LENGTH = strlen(READ);
		if (READ_LENGTH < INDEX_DEPTH) { 
			fprintf(stderr, "\n!!! WARNING: Read '%s' in line %lu is shorter than the specified seedlength! It will be omitted!\n\n", READ_ID, linenr);
			return -1;
		}

		tmp = strtok('\0', "\t");
		if (tmp == NULL) {
			fprintf(stderr, "ERROR: Paired-end flag is empty, line %lu!\n", linenr);
			exit(0);
		}	
		READ_PE_FLAG = atoi(tmp);
		
		READ_QUALITY[0] = strtok('\0', "\t");
		if (READ_QUALITY[0] == NULL) {
			fprintf(stderr, "ERROR: Read Quality 1 is empty, line %lu!\n", linenr);
			exit(0);
		}
		/*if (strlen(READ_QUALITY[0]) != READ_LENGTH) {
			fprintf(stderr, "ERROR: Read quality 1 hasn't length of read, line %lu!\n", linenr);
			exit(0);
		}*/
		
		READ_QUALITY[1] = strtok('\0', "\t");
		if (READ_QUALITY[1] == NULL) {
			fprintf(stderr, "ERROR: Read Quality 2 is empty, line %lu!\n", linenr);
			exit(0);
		}
		/*if (strlen(READ_QUALITY[1]) != READ_LENGTH) {
			fprintf(stderr, "ERROR: Read quality 2 hasn't length of read, line %lu!\n", linenr);
			exit(0);
		}*/
		
		READ_QUALITY[2] = strtok('\0', "\n");
		if (READ_QUALITY[2] == NULL) {
			fprintf(stderr, "ERROR: Read Quality 3 is empty, line %lu!\n", linenr);
			exit(0);
		}	
		/*if (strlen(READ_QUALITY[2]) != READ_LENGTH) {
			fprintf(stderr, "ERROR: Read quality 3 hasn't length of read, line %lu!\n", linenr);
			exit(0);
		}*/

		READ_FORMAT = 2;

	}

	// build READ_ENCODED:	
	//encode_read();

	return 0;
}

unsigned int map_fast()
{
	INDEX_ENTRY index_entry;
	unsigned int pos, i, j, k, p, chars, chr, block, chrom_overlap = 0, hits_reported = 0, strain;
	int slot, nr_mms, readpos, seqpos, readstart;
	int firstslot = 0;
	unsigned char position;
	char run, nr_runs, rev, nr_seeds = READ_LENGTH / INDEX_DEPTH, mm, perfect = 0, cancel, in_superblock;
	
	//only debugging
	char *seq = (char *) malloc(READ_LENGTH * 4 * sizeof(char));

	
	if (NUM_GAPS != 0) nr_runs = (nr_seeds > 1)? 2: 1;
	else {
		if (nr_seeds > NUM_MISMATCHES) nr_runs = NUM_MISMATCHES + 1;
			else nr_runs = nr_seeds;
	}
	
	char max_mms = nr_runs - 1;
	int mmpos[max_mms];
	
	for (run=1; run<=nr_runs; ++run) {
		
		if (nr_runs == 1) nr_runs = 0;	// a bit fishy, but nr_runs and run only have to be different, thats why this assignment is due
		if (run == nr_runs) readstart = READ_LENGTH - INDEX_DEPTH;
			else		 	readstart = (run-1) * INDEX_DEPTH;

		slot = get_slot(readstart);
		if (run == 1) firstslot = slot;
		
		if (slot >= 0) {	// tests if slot has an unallowed char!

			for (rev=0; rev <= MAP_REVERSE; ++rev) {
				if (!rev) index_entry = INDEX[slot];
					else  index_entry = INDEX_REV[slot];
					
				// for each mapping position
				for (i=0; i!=index_entry.num; ++i) {
					
					block = 0;
					position = 0;
					if (!rev) {
						memcpy(&block, &((index_entry.last_entry-(i+1))->id[0]), 3 * sizeof(char));
						memcpy(&position, &((index_entry.last_entry-(i+1))->id[3]), sizeof(unsigned char));
					}
					else {
						memcpy(&block, &((index_entry.last_entry-(index_entry.num-i))->id[0]), 3 * sizeof(char));
						memcpy(&position, &((index_entry.last_entry-(index_entry.num-i))->id[3]), sizeof(unsigned char));
					}
					pos = (unsigned int) position + BLOCK_TABLE[block].pos;	// 0-initialized
					chr = BLOCK_TABLE[block].chr;
					strain = BLOCK_TABLE[block].strain;
					in_superblock = (strain == 0) && BLOCK_TABLE[block].strainpos;
					STRAINPOS[strain] = (strain != 0)? BLOCK_TABLE[block].strainpos: BLOCK_TABLE[block].pos;
					STRAINPOS[strain] += position + 1;
					
					//////////////////////////////////////
					// get sequence(s) out of block table:
					//SEQ[strain][0] = '\0';
					//SEQ[strain][readstart] = '\0';

					//NUM_SEQS = 0;
					//SEQ[strain][0] = '\0';
					BRANCHES = 0;

					// stores all sequences to which mapping has to occur in SEQ and the corresponding strains in SEQS!
					if (rev) {
						/*SEQ[strain][READ_LENGTH - readstart - INDEX_DEPTH] = '\0';
						if (get_readseq_left(block, position, strain, READ_LENGTH - readstart - INDEX_DEPTH, 0) == 0) {
							SEQS[NUM_SEQS] = strain;
							NUM_SEQS++;
							SEQ[strain][READ_LENGTH - readstart - INDEX_DEPTH] = '\0';
						}*/
						SEQS[strain] = get_genome_pos_left(block, position, strain, READ_LENGTH - readstart - INDEX_DEPTH, in_superblock);
					}
					else {
						SEQS[strain] = get_genome_pos_left(block, position, strain, readstart, in_superblock);
						
						/*SEQ[strain][readstart] = '\0';
						if (get_readseq_left(block, position, strain, readstart, 0) == 0) {
							SEQS[NUM_SEQS] = strain;
							NUM_SEQS++;
							SEQ[strain][readstart] = '\0';
						}*/
					}

					if (SEQS[strain] == 0) {
						if (rev) {
							STRAINPOS[strain] = STRAINPOS[strain] - (READ_LENGTH - readstart - INDEX_DEPTH);
						}
						else {
							STRAINPOS[strain] = STRAINPOS[strain] - readstart;
						}
						get_readseq_right(STARTBLOCK[strain], BLOCKOFFSET[strain], strain, READ_LENGTH, in_superblock, 0);
					}
					else {
						SEQ[strain][0] = '\0';
						SEQS[strain] = -1;
					}
					
					
					//////////////////////////////////////////
					//////////////////////////////////////////
					/*if (rev) get_readseq_right(block, position, strain, readstart + INDEX_DEPTH, 0);
					else	 get_readseq_right(block, position, strain, READ_LENGTH - readstart, 0);
					SEQ[strain][READ_LENGTH] = '\0';

					//////////////////////////////////////////
					//////////////////////////////////////////
					
					while (p > 0) {
						
						if (strlen(BLOCK_TABLE[block].seq) - position < p) {
							strcpy(SEQ[strain]+strlen(SEQ[strain]), BLOCK_TABLE[block].seq + position);
							p -= strlen(BLOCK_TABLE[block].seq) + position;
							
							if (strain == 0 && BLOCK_TABLE[block].next_block != 0) {
								// create new seqs:
								block = BLOCK_TABLE[block].next_block;
								strcpy(SEQ[BLOCK_TABLE[block].strain], SEQ[strain]);
							}
							
							block = BLOCK_TABLE[block].next_block;
						} else {
							strncpy(SEQ[strain]+strlen(SEQ[strain]), BLOCK_TABLE[block].seq + position, p); 
							p = 0;
						}
						
					}*/
					//////////////////////////////////////
					
					
					/*
					if (!rev) {
						chrstart = pos - (run!=nr_runs) * (run-1) * INDEX_DEPTH - (run==nr_runs) * (READ_LENGTH - INDEX_DEPTH);
					}	// 0-initialized
					else {
						chrstart = pos + (run!=nr_runs) *   run   * INDEX_DEPTH + (run==nr_runs) * READ_LENGTH - 1; 
					}
					
					
					// check if read can map on position in genome:
					if ( (!rev && chrstart < 0) ||
						  (rev && chrstart < READ_LENGTH - 1) ) {
						if (STATISTICS) chrom_overlap++;
					}
					else if ( (!rev && chrstart + READ_LENGTH > CHR_LENGTH[chr]) ||
							  ( rev && chrstart > CHR_LENGTH[chr] - 1) ) {
						if (STATISTICS) chrom_overlap++;
					}
					else {*/
					
					
					//for (k=0; k!=NUM_SEQS; ++k) {
					for (k=0; k<=NUM_STRAINS; ++k) {
					
						if (BRANCHES) strain = k;
						
						if (SEQS[strain] != 0) {
							SEQ[strain][0] = '\0';
							SEQS[strain] = -1;
							continue;
						}
					
						/*if (SEQ_OVERHANG[SEQS[k]] > 0) {
							SEQ[SEQS[k]][0] = '\0';
							continue;
						}*/

						/*if (strlen(SEQ[SEQS[k]]) != READ_LENGTH) { // if left superblock has strain seq, but not right superblock -> fill up to the right
							if (rev) get_readseq_right(block, position, SEQS[k], readstart + INDEX_DEPTH, 1);
							else get_readseq_right(block, position, SEQS[k], READ_LENGTH - readstart, 1);
							SEQ[SEQS[k]][READ_LENGTH] = '\0';
						}*/
						
						if (strlen(SEQ[strain]) == 0) { // if left superblock has strain seq, but not right superblock -> fill up to the right
							if (rev) SEQS[strain] = get_readseq_right(STARTBLOCK[strain], BLOCKOFFSET[strain], strain, READ_LENGTH, 1, 0);
							else SEQS[strain] = get_readseq_right(STARTBLOCK[strain], BLOCKOFFSET[strain], strain, READ_LENGTH, 1, 0);
							SEQ[strain][READ_LENGTH] = '\0';
							
							if (SEQS[strain] != 0) {
								SEQ[strain][0] = '\0';
								SEQS[strain] = -1;
								continue;
							}
						}
						decode_strainseq(seq, SEQ[strain]);
						strcpy(SEQ[strain], seq);
						
						nr_mms = 0;
						chars = 0;
						cancel = 0;
						
						readpos = 0;
						//chrpos = chrstart;
						seqpos = rev? READ_LENGTH-1: 0;
						
						for (j=1; j!=run; ++j) {
							
							mm = 0;
							
							for (p=0; p!=INDEX_DEPTH; ++p) {
								
								READ[readpos+p] = toupper(READ[readpos+p]);
								
								if ( ( rev && get_compl_base(SEQ[strain][seqpos-p]) != READ[readpos+p]) ||  
									 (!rev &&                SEQ[strain][seqpos+p]  != READ[readpos+p]) ||
									  !(unique_base(READ[readpos+p])) ) {
									  		  	
										if (nr_mms < max_mms) {
											mmpos[nr_mms] = readpos + p + 1;
											++nr_mms;
										}
										else {
											cancel = 1;
											break;
										}
										++mm;
																				
								}
								
								++chars;
								
							}
								
							if (!mm) {
								cancel = 1;
								break;
							}
						
							if (cancel) break;
							
							//chrpos += INDEX_DEPTH * (rev? -1: 1);
							seqpos += INDEX_DEPTH * (rev? -1: 1);
							readpos += INDEX_DEPTH;
															
						}
		
						
						//chrpos  = chrpos  + (run!=nr_runs) * INDEX_DEPTH * (rev? -1: 1);
						seqpos  = seqpos  + (run!=nr_runs) * INDEX_DEPTH * (rev? -1: 1);
						readpos = readpos + (run!=nr_runs) * INDEX_DEPTH;
						while (!cancel && chars != READ_LENGTH - INDEX_DEPTH) {
							
							READ[readpos] = toupper(READ[readpos]);
							if ( ( rev && get_compl_base(SEQ[strain][seqpos]) != READ[readpos]) ||
								 (!rev && 				 SEQ[strain][seqpos]  != READ[readpos]) ||
								  !(unique_base(READ[readpos])) ) {
								  	
									if (nr_mms == max_mms) {
										cancel = 1;
										break;
									}
									mmpos[nr_mms] = readpos + 1;
									++nr_mms;
																		
							}
							
							readpos++;
							//chrpos += rev? -1: 1;
							seqpos += rev? -1: 1;
							chars++;
									
						}
							
						
						if ( !cancel && nr_mms <= max_mms ) {
							// create hit
							HIT* hit = alloc_hit();
							//hit->chromosome = chr;
							hit->readpos = 1;
							
							if (rev) hit->chromosome = -chr-1;
							else 	 hit->chromosome = chr+1;
							
							hit->strain = strain;							
							hit->startblock = STARTBLOCK[strain];
							hit->blockoffset = BLOCKOFFSET[strain];
							hit->length = READ_LENGTH;
							hit->strainpos = STRAINPOS[strain]; //hit->blockoffset + (hit->strain != 0)? BLOCK_TABLE[hit->startblock].strainpos: BLOCK_TABLE[hit->startblock].pos;
							//hit->end = (hit->strain != 0)? BLOCK_TABLE[hit->startblock].strainpos: BLOCK_TABLE[hit->startblock].pos;
							//hit->end += hit->startoffset + READ_LENGTH;//ENDSTRAINPOS[strain]; 
							
							mm = 0;
							// create possible mismatches
							for (j=0; j!=nr_mms; ++j) {
								hit->edit_op[j].mm = 1;
								if (hit->chromosome > 0) hit->edit_op[j].pos = mmpos[j];
									else				 hit->edit_op[j].pos = READ_LENGTH - mmpos[j] + 1;
								hit->mismatches++;
								mm = 1;
							}
							
							if (!ALL_HIT_STRATEGY && nr_mms < max_mms) max_mms = nr_mms;
							
							// perfect matching read
							if (STATISTICS) {
								if (!mm) {
									if (rev) PERFECT_HITS_REV++;
										else PERFECT_HITS++;
									if (!perfect) PERFECT_READS++;
									
									perfect = 1;
								}
								else NOT_ALIGNED[1]++;
							}
							
							insert_into_scorelist(hit, 0);
							
							if (STATISTICS)	HITS_LEN[READ_LENGTH]++;
					
							hits_reported++;
							
						} // end of create hit
						
						
						SEQ[strain][0] = '\0';
						SEQS[strain] = -1;
						if (!BRANCHES) break;						
					} // end of for each strain
					// end of no hit-overlap with chrom border
					
				} // end of for each mapping pos
				
			} // end of forward/reverse	rev
			
		} // end of slot != -1
			
	} // end of runs = different slots
	
	if (!ALL_HIT_STRATEGY && !hits_reported) {	//if best hit strategy, but no mappings found -> prepare for complete mapping! 
		ALL_HIT_STRATEGY = -1;
	} else {
		if (STATISTICS) {
			NUM_HITS += hits_reported;
			HITS_PER_READ += hits_reported;
			ENDSTART_MAPPED[0] += chrom_overlap;
		}
	}
	
	free(seq);

	
	
	return firstslot;
}


int get_readseq_right(unsigned int block, unsigned int blockpos, unsigned int strain, unsigned int len, char stick_to_strain, char overlap)
{
	char *seq = malloc(6*READ_LENGTH*sizeof(char));
	unsigned int nextblock, nextnextblock, l, lastlen = len;
	int indel_offset, len_before;
	char in_superblock = (strain == 0) && BLOCK_TABLE[block].strainpos;
		
	while (len != 0) {
		
		if (BLOCK_TABLE[block].indel_offset < 0) indel_offset = BLOCK_TABLE[block].indel_offset + 1;
		else indel_offset = 0;
	
		unsigned int block_seq_len = strlen(BLOCK_TABLE[block].seq);
		l = strlen(SEQ[strain]);
			
		if (block_seq_len + indel_offset == 0) {
			block = BLOCK_TABLE[block].next_block;
			continue;
		}
		
		if (block_seq_len + indel_offset - blockpos >= len) {
			seq[0] = '\0';

			//strncpy(SEQ[strain]+strlen(SEQ[strain]), BLOCK_TABLE[block].seq + blockpos, len);
			memcpy(SEQ[strain]+l, BLOCK_TABLE[block].seq + blockpos, len);
			SEQ[strain][l+len] = '\0';
		
			// here save end pos of hit (pos relative to strain)!!!
			lastlen = len;
			len = 0;
		}
		else {
			len = len - block_seq_len - indel_offset + blockpos;

			//strncpy(SEQ[strain]+l, BLOCK_TABLE[block].seq + blockpos, strlen(BLOCK_TABLE[block].seq) + indel_offset - blockpos);
			memcpy(SEQ[strain]+l, BLOCK_TABLE[block].seq + blockpos, block_seq_len + indel_offset - blockpos);
			SEQ[strain][l + block_seq_len + indel_offset - blockpos] = '\0';
			
			nextblock = BLOCK_TABLE[block].next_block;
			if (nextblock == 0) {
				free(seq);
				return len;	// block is last block of chromosome -> read could not have been mapped!!
			}
			
			nextnextblock = BLOCK_TABLE[nextblock].next_strain_front;
				
				if (strain == 0) {
					if (!stick_to_strain && !in_superblock) {	// if refblock is not in a superblock or seq only on specified strain is demanded
						while (nextnextblock != 0) { //len >= INDEX_DEPTH && BLOCK_TABLE[nextnextblock].strain != 0) {
							if (SEQS[BLOCK_TABLE[nextnextblock].strain] < 0) {
								len_before = strlen(SEQ[0]) - (NUM_GAPS - SEQS[0]);
								strcpy(SEQ[BLOCK_TABLE[nextnextblock].strain], SEQ[0]);
								
								SEQS[BLOCK_TABLE[nextnextblock].strain] = get_readseq_right(nextnextblock, 0, BLOCK_TABLE[nextnextblock].strain, len, 1, overlap); // recursion with new strain
								BRANCHES = 1;
								
								STARTBLOCK[BLOCK_TABLE[nextnextblock].strain] = STARTBLOCK[0];
								BLOCKOFFSET[BLOCK_TABLE[nextnextblock].strain] = BLOCKOFFSET[0];
								STRAINPOS[BLOCK_TABLE[nextnextblock].strain] = BLOCK_TABLE[nextnextblock].strainpos - strlen(SEQ[0]) + (overlap * (NUM_GAPS - SEQS[0])) + 1;
							}
							
							nextnextblock = BLOCK_TABLE[nextnextblock].next_strain_front;
						}
					}
					block = nextblock;
				}
				else {
					if (BLOCK_TABLE[nextblock].strain == 0 && nextnextblock != 0) { //BLOCK_TABLE[nextnextblock].strain != 0) {
						block = nextblock;
						nextblock = nextnextblock;
						while (BLOCK_TABLE[nextblock].strain != strain && nextblock != 0) {
							nextblock = BLOCK_TABLE[nextblock].next_strain_front;
						}
						if (BLOCK_TABLE[nextblock].strain == strain) block = nextblock;
					}
					else {
						block = nextblock;
					}
				}
				
			//}
			
		}

		blockpos = 0;
		
	} // while len != 0
	
	//if (strain == 0) ENDSTRAINPOS[0] = BLOCK_TABLE[block].pos + lastlen - 1;		// 1-initialized
	//else  ENDSTRAINPOS[strain] = BLOCK_TABLE[block].strainpos + lastlen - 1;		// 1-initialized
	
	//SEQ_OVERHANG[strain] = 0;
	
	free(seq);
	
	return len;
}




int map_short_read(unsigned int num, int first_slot)
{     
        char reverse;
        unsigned int readpos = 0;
        unsigned int spacer;
        unsigned int slot;

		if (first_slot < 0) {	// first slot has an unallowed char!
			spacer = -first_slot;
			readpos = -first_slot;
			HAS_SLOT = 0;
		}
		else if (ALL_HIT_STRATEGY) {	// first slot hasn't been computed yet
			spacer = 0;
			slot = 0;
			HAS_SLOT = 0;
		}
		else {	// first slot has already been computed in map_fast and doesn't contain unallowed chars 
			slot = first_slot;
			
			reverse = 0;
			if (INDEX[slot].last_entry != NULL)
				reverse = 1;
			if (MAP_REVERSE && INDEX_REV[slot].last_entry != NULL)
				reverse = (reverse + reverse) + 2;

			if (reverse > 0) {
				if (!seed2genome(num, slot, readpos + 1, reverse)) return 0; // add one to readpos for 1-initialization
			}
			
			readpos++;
			spacer = INDEX_DEPTH;
			SLOT = first_slot;
			HAS_SLOT = 1;
		}

        while (spacer < READ_LENGTH) {
        		READ[spacer] = toupper(READ[spacer]);
                if (spacer < readpos + INDEX_DEPTH - 1) {
                        if (READ[spacer]=='A' || READ[spacer]=='T' || READ[spacer]=='C' || READ[spacer]=='G') {
                                spacer++;
                        }
                        else {
                                spacer++;
                                readpos = spacer;
                                HAS_SLOT = 0;
                        }
                }
                else {
                        if (READ[spacer]=='A' || READ[spacer]=='T' || READ[spacer]=='C' || READ[spacer]=='G') {

                                slot = get_slot(readpos);

								// reverse: 0: slot doesnt match in either index or index_rev, 1: only index, 2: only index_rev, 4: both
								reverse = 0;
								if (INDEX[slot].last_entry != NULL)
									reverse = 1;
								if (MAP_REVERSE && INDEX_REV[slot].last_entry != NULL)
									reverse = (reverse + reverse) + 2;

								if (reverse > 0) {
									if (!seed2genome(num, slot, readpos + 1, reverse)) return 0; // add one to readpos for 1-initialization
								}
								
                                spacer++;
                                readpos++;
                                HAS_SLOT = 1;
                        }
                        else {
                                spacer++;
                                readpos=spacer;
                                HAS_SLOT = 0;
                        }
                }
        }

		HAS_SLOT = 0;
		
        return 1;
}



int get_slot(int pos)
{
        unsigned int slot = 0;
        unsigned int i;
        int c = 0;

        if (HAS_SLOT == 0) {

                for (i=0; i<INDEX_DEPTH; i++) {
                		READ[pos+i] = toupper(READ[pos+i]);
                        if (READ[pos+i] == 'A') {
                                c = 0;
                        }
                        else {
                                if (READ[pos+i] == 'C') {
                                        c = 1;
                                }
                                else {
                                        if (READ[pos+i] == 'G') {
                                                c = 2;
                                        }
                                        else {
                                                if (READ[pos+i] == 'T') {
                                                        c = 3;
                                                }
                                                else {
                                                        return -pos-i-1;
                                                }
                                        }
                                }
                        }
                        slot = slot + POWER[i] * c;
                }

        }
        else {

                slot = SLOT;
                slot >>= 2;

				READ[pos+INDEX_DEPTH-1] = toupper(READ[pos+INDEX_DEPTH-1]);
                if (READ[pos+INDEX_DEPTH-1] == 'A') {
                        slot = slot | BINARY_CODE[0];
                }
				else {
                	if (READ[pos+INDEX_DEPTH-1] == 'C') {
	                        slot = slot | BINARY_CODE[1];
        	        }
					else {
						if (READ[pos+INDEX_DEPTH-1] == 'G') {
							slot = slot | BINARY_CODE[2];
						}
						else {
							if (READ[pos+INDEX_DEPTH-1] == 'T') {
								slot = slot | BINARY_CODE[3];
							}
							else {
								return -1;
							}
						}
					}
				}
				
        }

        SLOT = slot;

        return(slot);
}

void encode_read()
{
	int i;
	for (i=0; i!=READ_LENGTH; ++i) {
		switch (READ[i]) {
			case 'A': READ[i] = 65; break;
			case 'C': READ[i] = 67; break;
			case 'G': READ[i] = 71; break;
			case 'T': READ[i] = 84; break;
			case 'a': READ[i] = 65; break;
			case 'c': READ[i] = 67; break;
			case 'g': READ[i] = 71; break;
			case 't': READ[i] = 84; break;
			default: fprintf(stderr, "ERROR: Unallowed character found in read '%s'!\n", READ_ID);
					 exit(0); 
		}
	}
	READ_ENCODED[READ_LENGTH] = '\0';
}

// TODO not necessary anymore
char *decode_read(char *read)
{
	int i;
	for (i=0; i!=READ_LENGTH; ++i) {
		switch (READ_ENCODED[i]) {
			case 0: read[i] = 'A'; break;
			case 1: read[i] = 'C'; break;
			case 2: read[i] = 'G'; break;
			case 3: read[i] = 'T'; break;
		}
	}
	read[READ_LENGTH] = '\0';
	return read;	
}
