// Authors: Korbinian Schneeberger and Joerg Hagmann
// Copyright (C) 2008/09 by Max-Planck Institute for Developmental Biology, Tuebingen, Germany

#include "genomemapper.h"

int size_hit(HIT *hit, unsigned int *oldlength);
int browse_hits();
int check_duplicate(HIT* hit);
void printgenome();
MAPPING_ENTRY *create_mapping_entry(int *genome_pos, int *genome_chr, unsigned short int *strain, char num);

int map_reads()
{
	char eof = 0, read_mapped;
	unsigned int count_reads = 0;
	int first_slot = 0;
	int num_edit_ops = NUM_EDIT_OPS;
	//if (STATISTICS) MAX_USED_SLOTS = 0;
	//unsigned int MAXHITS = 0;
	int c1 = 0;
	int c2 = 0;

	while (!eof) {
		
		count_reads++;

//if (DEBUGREAD > 0 && count_reads == DEBUGREAD) DEBUG = 1;
//if (DEBUGREAD > 0 && count_reads != DEBUGREAD) DEBUG = 0;
		
		LONGEST_HIT = 0;

		eof = read_short_read();
		if (eof != 0) continue;
		
		if (READ_LENGTH < HITLEN_LIMIT) {
			fprintf(stderr, "\n!!! WARNING! Read %d (%s) with length %d is shorter than the hitlength limit (=%d) and will not be processed!\n\n", 
				count_reads, READ_ID, READ_LENGTH, HITLEN_LIMIT);
		}
		else {
//			printf("%d ", count_reads); fflush(stdout);
			
			// progress output, just for user convenience
			if (VERBOSE && (count_reads % 10000 == 0)) {
				printf(".");
				fflush(stdout);
			}

			//if (STATISTICS) HITS_PER_READ = 0;
			
			HITS_IN_SCORE_LIST = 0;
			
			// map_fast IF 1) best hit strategy 2) only hits up to RL/ID mismatches without gaps should be found
			// READ_LENGTH / INDEX_DEPTH is the number of seeds fitting in the current read
			int nr_seeds = (int) (READ_LENGTH / INDEX_DEPTH);
 			if (!ALL_HIT_STRATEGY || (NUM_MISMATCHES < nr_seeds && NUM_GAPS == 0)) {
 				first_slot = map_fast();	// if no hits could have been found: ALL_HIT_STRATEGY = -1, necessitating execution of seed&extend in the following
 				c1++;
 			}
		
 			// map_complete IF 1) all hit strategy 2) best hit strategy and no mappings found in map_fast BUT NOT IF MM < RL/ID AND gaps=0 (since map_fast has already found them) 
			if (ALL_HIT_STRATEGY != 0 && !(NUM_MISMATCHES < nr_seeds && NUM_GAPS == 0)) {
				
				c2++;
				map_short_read(count_reads, first_slot);
				
				// for removing duplicates:	
				dealloc_mapping_entries();
				
				// Print Mapping Entry statistics
				//fprintf(CHROM_ENTRY_FP, "%d\t%d\n", CHROMOSOME_ENTRY_OPERATOR->used, NUM_MAPPING_ENTRIES);
				
				CHROMOSOME_ENTRY_OPERATOR->used = 0;
				
				browse_hits();
				
				if (ALL_HIT_STRATEGY < 0) ALL_HIT_STRATEGY = 0;		// resetting ALL_HIT_STRATEGY
				
			}
			
			if (ALL_HIT_STRATEGY < 0) ALL_HIT_STRATEGY = 0;			// resetting ALL_HIT_STRATEGY
			
			//if (STATISTICS && HITS_PER_READ > MAXHITS) MAXHITS = HITS_PER_READ;
			
			NUM_EDIT_OPS = num_edit_ops;	// resetting NUM_EDIT_OPS.. has been changed probably in alignments and map_fast
			
			read_mapped = print_hits();	// returns 1 if at least one hit is printed, 0 otherwise
			
			if (read_mapped) READS_MAPPED++;
			else {
				if (strlen(LEFTOVER_FILE_NAME) > 0) print_leftovers();
			}
			
			
			dealloc_mapping_entries(); //muss eigentlich nur der container zaehler zurückgesetzt werden... optimization?
			dealloc_hits();
			dealloc_hits_by_score();
			CHROMOSOME_ENTRY_OPERATOR->used = 0;
			if (LONGEST_HIT != 0) dealloc_hit_lists_operator();
			
		}

	}
	
	fclose(QUERY_FP);
	
	if (strlen(OUT_FILE_NAME) > 0) fclose(OUT_FP);
	if (strlen(LEFTOVER_FILE_NAME) > 0) fclose(LEFTOVER_FP);
	
	/*if (STATISTICS) {
		printf("\n\n    MAP_FAST  = %d\n",c1);
		printf("    MAP_COMPL = %d\n\n",c2);
		printf("Maximal number of hits per read: %d\n",MAXHITS);
	}*/
	
	NUM_READS = count_reads - 1;
	
	if (VERBOSE) printf("\n");
	
	return(0);
}


int seed2genome(unsigned int num, unsigned int index_slot, unsigned int readpos, char reverse)
{
	INDEX_ENTRY index_entry;
	int genome_pos, neighbor_pos, genome_chr;
	unsigned int block;
	unsigned short int strain, neighbor_strain;
	unsigned char pos;
	int read_num = DEBUG? num: -1;
	int c;

	char extended = 0;

	CHROMOSOME_ENTRY *chromosome_director_neighbor;
	MAPPING_ENTRY *mapping_entry, *neighbor;

	HIT *hit = NULL;

	unsigned int i,k;
	unsigned int oldlength;
	
	// reverse = 1: only index, 2: only index_rev, 4: both
	while (reverse > 0) {
		
		if (reverse != 2) index_entry = *(INDEX+index_slot);
		else 		  index_entry = *(INDEX_REV+index_slot);

//if (read_num == num) printf("###############################################\n");
//if (read_num == num) printf("Add seed to genomepositions from slot # %d (%s) containing %d genomepositions\n", index_slot, get_seq(index_slot), index_entry.num);
	
		for (i=0; i<index_entry.num; i++) { // foreach seed...
		
			extended = 0;
		
//if (read_num == num) printf("############################\n");
//if (read_num == num) printf("Now adding seed # %d/%d of read %i (%s), slot %i, ori %d readpos %d\n", i+1, index_entry.num, num, READ_ID, index_slot, reverse, readpos);

			// Get current position in the chromosome
			if (reverse != 2) {
				block = 0;
				memcpy(&block, &((index_entry.last_entry-(i+1))->id[0]), 3 * sizeof(char));
				pos = 0;
				memcpy(&pos, &((index_entry.last_entry-(i+1))->id[3]), sizeof(unsigned char));
				strain = BLOCK_TABLE[block].strain;
				genome_pos = (strain == 0)? pos + BLOCK_TABLE[block].pos : pos + BLOCK_TABLE[block].strainpos;
				genome_chr = BLOCK_TABLE[block].chr+1;
			}
			else {
				block = 0;
				memcpy(&block, &((index_entry.last_entry-(index_entry.num-i))->id[0]), 3 * sizeof(char));
				pos = 0;
				memcpy(&pos, &((index_entry.last_entry-(index_entry.num-i))->id[3]), sizeof(unsigned char));
				strain = BLOCK_TABLE[block].strain;
				genome_pos = (strain == 0)? pos + BLOCK_TABLE[block].pos : pos + BLOCK_TABLE[block].strainpos;
				genome_chr = -BLOCK_TABLE[block].chr-1;
			}
			
//if (read_num == num) printf("block %d pos %d [block].pos %d genome_pos %d genome_chr %d strain %d\n",block, pos, BLOCK_TABLE[block].pos, genome_pos, genome_chr, strain);


			// HIT EXTENSION
			
			//Check left (for plus strand) and right (for minus strand) neighbor at the genomeposition of the hit if there is a hit to join.
			if (genome_pos > 0 && readpos > 1) {
//if (read_num == num) printf("Now checking if left neighbor exists and is willing to join (genome_pos %i)\n",genome_pos);
				
				BRANCHES = 0;
				
				// get correct left or right genome_pos according to graph structure, fill SEQS and set NUM_SEQS
				if (genome_chr > 0) {
					if (pos == 0) SEQS[strain] = get_genome_pos_left(block, pos, strain, 1, 0);
					else {
						STARTBLOCK[strain] = block;
						BLOCKOFFSET[strain] = pos-1;
						SEQS[strain] = 0;
					}
				}
				else {
					if (pos == strlen(BLOCK_TABLE[block].seq)-1) SEQS[strain] = get_genome_pos_right(block, pos, strain, 1, 0);
					else {
						STARTBLOCK[strain] = block;
						BLOCKOFFSET[strain] = pos+1;
						SEQS[strain] = 0;
					}
				}
				
				for (k=0; k<=NUM_STRAINS; ++k) {
					
					if (BRANCHES == 0) {
						neighbor_strain = strain;
					}
					else {
						neighbor_strain = k;
					}
//if (read_num == num) printf("neighbor_Strain: %d startblock %d startoffset %d\n",neighbor_strain, STARTBLOCK[neighbor_strain], BLOCKOFFSET[neighbor_strain]);
					
					
					if (SEQS[k] != 0) continue;
					
					if (BLOCK_TABLE[STARTBLOCK[neighbor_strain]].strain == 0)
						neighbor_pos = BLOCK_TABLE[STARTBLOCK[neighbor_strain]].pos + BLOCKOFFSET[neighbor_strain];
					else
						neighbor_pos = BLOCK_TABLE[STARTBLOCK[neighbor_strain]].strainpos + BLOCKOFFSET[neighbor_strain];
				
					
					if (*(GENOME + neighbor_pos) != NULL) { // Is there a chromosome director?
//if (read_num == num) printf("  Found a neighbored chromosome director at neighbor_pos %d\n", neighbor_pos);
						chromosome_director_neighbor = *(GENOME + neighbor_pos);
	
						// Is the chrom director from actual read?
						if (chromosome_director_neighbor < (CHROMOSOME_ENTRY_OPERATOR->entries+CHROMOSOME_ENTRY_OPERATOR->used) &&
						   (chromosome_director_neighbor->genome_pos == neighbor_pos)) {

							c = 0;
							while (chromosome_director_neighbor != NULL) {


								if (chromosome_director_neighbor->chromosome != genome_chr) {
									chromosome_director_neighbor = chromosome_director_neighbor->next;
									continue;
								}

								// if we are on Ref and not in a superblock, try to extend all chrom.directors of neighbor slot!
								if (chromosome_director_neighbor->strain != neighbor_strain) {
									chromosome_director_neighbor = chromosome_director_neighbor->next;
									continue;
								}


//if (read_num == num) printf("  Found neighboured mapping entry list\n");
			
								neighbor = chromosome_director_neighbor->mapping_entries;
			
//if (read_num == num) { printf("  Neighbor readpos: %d  actual readpos %d\n", neighbor->readpos, readpos); printf("  neighbor hit: "); if (neighbor->hit!=NULL) printhit(neighbor->hit); else printf("null\n"); }
				
									if (neighbor->readpos == readpos-1) { // is the neighbored mapping entry also neighbored in the read?
										
										hit = neighbor->hit;
//if (read_num==num) { printf("drin!!!  "); printhit(hit); }
										if (hit != NULL) {
											oldlength = hit->length;
					
											// 1) update neighbor hit
											if (reverse == 2) {
												hit->startblock = block;
												hit->blockoffset = pos;
												hit->strainpos--;
											}
											hit->length++;
											if (read_num == num) printhit(hit);
					
											// 2) create own mapping entry
											mapping_entry = create_mapping_entry(&genome_pos, &genome_chr, &(hit->strain), (read_num == num));
											mapping_entry->readpos = readpos;
											mapping_entry->hit = hit;
											
											size_hit(hit, &oldlength);
					
											if (hit->strain == strain) {
												extended = 1;
											}
										}
									}
								
								chromosome_director_neighbor = chromosome_director_neighbor->next;
							} // for all chrom.directors
						}
					}
					
					SEQS[neighbor_strain] = -1;
					if (BRANCHES == 0) {
						
						break;
					}

				} // foreach neighbor position on all strains
				
//if (read_num == num) { printf("Will the hit be combined with adjacent neighbor? %d\n", extended); }
				

				// MISMATCH extension - only on reference (for now?) 
				
				//combine with possible hit at position seedlength+1 to the left(+) or right(-) to span hit over mismatch
				if (strain == 0 && !extended && readpos > INDEX_DEPTH && NUM_MISMATCHES != 0) {
//if (read_num == num) printf("Now checking if hit can be extended over mismatch\n");
					
					BRANCHES = 0;
					
					if (reverse != 2) {
						if (INDEX_DEPTH + 1 > pos) SEQS[strain] = get_genome_pos_left(block, pos, strain, INDEX_DEPTH + 1, 1);
						else {
							STARTBLOCK[strain] = block;
							BLOCKOFFSET[strain] = pos - INDEX_DEPTH - 1;
							SEQS[strain] = 0;
						}
					}
					else {
						if (pos + INDEX_DEPTH + 1 >= strlen(BLOCK_TABLE[block].seq)) SEQS[strain] = get_genome_pos_right(block, pos, strain, INDEX_DEPTH + 1, 1);
						else {
							STARTBLOCK[strain] = block;
							BLOCKOFFSET[strain] = pos + INDEX_DEPTH + 1;
							SEQS[strain] = 0;
						}
					}
					
					for (k=0; k<=NUM_STRAINS; ++k) {
						
						if (BRANCHES == 0) {
							neighbor_strain = strain;
						}
						else {
							neighbor_strain = k;
						}
						
						if (SEQS[neighbor_strain] != 0) continue;
						
						if (BLOCK_TABLE[STARTBLOCK[neighbor_strain]].strain == 0)
							neighbor_pos = BLOCK_TABLE[STARTBLOCK[neighbor_strain]].pos + BLOCKOFFSET[neighbor_strain];
						else
							neighbor_pos = BLOCK_TABLE[STARTBLOCK[neighbor_strain]].strainpos + BLOCKOFFSET[neighbor_strain];
					
					 	
						if (*(GENOME + neighbor_pos) != NULL) {
						
						 	chromosome_director_neighbor = *(GENOME + neighbor_pos);
	
							// Is the chrom director from actual read?
							if  (chromosome_director_neighbor < (CHROMOSOME_ENTRY_OPERATOR->entries+CHROMOSOME_ENTRY_OPERATOR->used) && 
								(chromosome_director_neighbor->genome_pos == neighbor_pos)) {

								c = 0;
								while (chromosome_director_neighbor != NULL) {
	
	
									if (chromosome_director_neighbor->chromosome != genome_chr) {// || chromosome_director_neighbor->strand != strand) {
										chromosome_director_neighbor = chromosome_director_neighbor->next;
										continue;
									}
	
									// if we are on Ref and not in a superblock, try to extend all chrom.directors of neighbor slot!
									if (chromosome_director_neighbor->strain != neighbor_strain) {
										chromosome_director_neighbor = chromosome_director_neighbor->next;
										continue;
									}
	

									neighbor = chromosome_director_neighbor->mapping_entries;

//if (read_num == num) { printf("MM neighbor hit: "); printhit(neighbor->hit); printf("  Found a potential entry, readops(neighbor)=%d, (actual)=%d\n",neighbor->readpos,readpos); }
										
										if (neighbor->readpos == readpos - INDEX_DEPTH - 1) {
//if (read_num == num) printf("  Readpos matches\n");
											if ((neighbor->hit)->mismatches < NUM_EDIT_OPS && (neighbor->hit)->mismatches-(neighbor->hit)->gaps < NUM_MISMATCHES) {
//if (read_num == num) printf("  Fancy! Mismatches < 4\n");
												
												// 1) update neighbor hit
												hit = neighbor->hit;
												oldlength = hit->length;
												
												if (reverse != 2) {
													hit->edit_op[hit->mismatches].pos = readpos - 1;
													hit->edit_op[hit->mismatches].mm = 1;
												}
												else {
													hit->startblock = block;
													hit->blockoffset = pos;
													hit->strainpos = hit->strainpos - INDEX_DEPTH - 1;
													hit->edit_op[hit->mismatches].pos = READ_LENGTH - readpos + 2;
													hit->edit_op[hit->mismatches].mm = 1;
												}
												hit->length += INDEX_DEPTH + 1;
												hit->mismatches++;
//if (read_num == num) printf("  Mismatch at pos %d, #mm=%d\n",hit->edit_op[hit->mismatches-1].pos, hit->mismatches);
//if (read_num == num) printhit(hit);
												
												// 2) create own mapping entry
												mapping_entry = create_mapping_entry(&genome_pos, &genome_chr, &neighbor_strain, (read_num == num));
												mapping_entry->readpos = readpos;
												mapping_entry->hit = hit;
												
												size_hit(hit, &oldlength);
												
												if (hit->strain == strain) extended = 1;
											}
										}
									
									chromosome_director_neighbor = chromosome_director_neighbor->next;

								} // for all chrom.directors
							}
						}
						
						SEQS[neighbor_strain] = -1;
						if (BRANCHES == 0) break;
					} // for all strains
					
				} // MM extension
				
			} // readpos > 1
			
	
			// for MM=0: if potential hit doesn't start at readpos 1, it cannot become perfect, thus it is not even allocated:
			if ( !extended && !(NUM_MISMATCHES == 0 && readpos != 1) ) {

				// create new hit:
				
//if (read_num == num) printf("No! Need my own hit structure\n");

				hit = alloc_hit();
				
				mapping_entry = create_mapping_entry(&genome_pos, &genome_chr, &strain, (read_num == num));
				mapping_entry->readpos = readpos;
				mapping_entry->hit = hit;
				
				hit->chromosome = genome_chr;
				
				oldlength = INDEX_DEPTH - 1;
				
				hit->readpos = readpos;
				hit->startblock = block;
				hit->blockoffset = pos;
				hit->strain = strain;
				hit->strainpos = (strain != 0)? BLOCK_TABLE[block].strainpos: BLOCK_TABLE[block].pos;
				hit->strainpos += pos + 1;
				hit->length = INDEX_DEPTH;
				
				size_hit(hit, &oldlength);

			}

		} //end of for each seed on read

		reverse -= 2; // 1->-1, 2->0, 4->2

	} //end of while (for each strand)
	
	return(1);
}


MAPPING_ENTRY *create_mapping_entry(int *genome_pos, int *genome_chr, unsigned short int *strain, char num)
{
	CHROMOSOME_ENTRY *chromosome_director, *chromosome_director_new;
	char flag = 0;

	// Check if there is already a chromosome director and get this or a new one
	if (*(GENOME+*genome_pos) == NULL) {
        	flag = 1;

//if (num) { printf("Alloc new chromosome director at genome_pos %d / genome_chr %d / strain %d\n", *genome_pos, *genome_chr+1, *strain); }

		chromosome_director = alloc_chromosome_entry(genome_pos, genome_chr, strain);
		if (!chromosome_director) return(0);    // chrom_container_size too small -> cancel this read
	
		*(GENOME + *genome_pos) = chromosome_director;

	}
	else {
//if (num) printf("Found chromosome director\n");

		chromosome_director = *(GENOME + *genome_pos);

//if (num) { printf("ChrEntryOp:       %p\n", (CHROMOSOME_ENTRY_OPERATOR->entries)); printf("ChrEntryOp[used]: %p\n", ((CHROMOSOME_ENTRY_OPERATOR->entries+CHROMOSOME_ENTRY_OPERATOR->used))); }
//if (num) { printf("chrom_director:   %p\n", chromosome_director); printf("used = %d\n",CHROMOSOME_ENTRY_OPERATOR->used); printf("chrom_dir->gen_pos: %d\n", chromosome_director->genome_pos); printf("genome_pos: %d\n", *genome_pos); }

		// is chrom_director from the actual read or from a former one?
		if (chromosome_director >= (CHROMOSOME_ENTRY_OPERATOR->entries+CHROMOSOME_ENTRY_OPERATOR->used)
		|| (chromosome_director < (CHROMOSOME_ENTRY_OPERATOR->entries+CHROMOSOME_ENTRY_OPERATOR->used) && 
		(chromosome_director->genome_pos != *genome_pos))) {
	
			// it's from a former read, thus we need a new chrom_director:
			chromosome_director = alloc_chromosome_entry(genome_pos, genome_chr, strain);
			if (!chromosome_director) return(0);    // chrom_container_size too small -> cancel this read
			
			*(GENOME + *genome_pos) = chromosome_director;
	
//if (num) printf("Overwrite chromosome director %p\n", chromosome_director);
		}
	
		// Parse the list of chromosome directors 
		while (chromosome_director->next != NULL && (chromosome_director->strain != *strain ||
			chromosome_director->chromosome != *genome_chr)) // || (chromosome_director->chromosome == *genome_chr && chromosome_director->strand != *strand)))
		{
			chromosome_director = chromosome_director->next;
		}

		// Chromosome director is set, but still it could be the wrong chromosome, strand or strain, if the right entry is not in there so far.
		if (chromosome_director->chromosome != *genome_chr || chromosome_director->strain != *strain) {// || chromosome_director->strand != *strand || chromosome_director->strain != *strain) {
			
//if (num) printf("Expanding list with new chrom.director\n");
			chromosome_director_new = alloc_chromosome_entry(genome_pos, genome_chr, strain);
			if (!chromosome_director_new) return(0);
			
			chromosome_director_new->next = chromosome_director->next;
			chromosome_director->next = chromosome_director_new;
	
			chromosome_director = chromosome_director_new;
			
		}
	
	}

	// Paste MAPPING_ENTRY in list of chromosome director slot
//if (num) { printf("Mapping entry, genome_chr %d\n", *genome_chr+1); printf("Mapping entry, chromsome director %p\n", chromosome_director); }


	if (chromosome_director->mapping_entries == NULL) {
//if (num) printf("No mapping entry in the chromosome director yet\n");
		chromosome_director->mapping_entries = alloc_mapping_entry();
	}
	
	return chromosome_director->mapping_entries;	
}


int get_genome_pos_left(unsigned int block, unsigned int blockpos, unsigned int strain, unsigned int len, char stick_to_strain)
{
	unsigned int prevblock, prevprevblock, startpos = blockpos, block_seq_len;
	char in_superblock = (strain == 0) && BLOCK_TABLE[block].strainpos;

	while (len != 0) {
		
		block_seq_len = strlen(BLOCK_TABLE[block].seq);

		if (block_seq_len < abs(BLOCK_TABLE[block].indel_offset)) {
			if (BLOCK_TABLE[block].prev_block == 0) {
				get_genome_pos_right(block, blockpos, strain, 1, 1);
				return len;
			}
			else {
				block = BLOCK_TABLE[block].prev_block;
				block_seq_len = strlen(BLOCK_TABLE[block].seq);
				blockpos = block_seq_len;
				continue;
			}
		}
		
		if (blockpos > block_seq_len + BLOCK_TABLE[block].indel_offset) blockpos = block_seq_len + BLOCK_TABLE[block].indel_offset + 1;
		
		if (blockpos >= len) {
			startpos = blockpos - len;
			len = 0;
		}
		else {
			
			len = len - blockpos;
			
			prevblock = BLOCK_TABLE[block].prev_block;
			if (prevblock == 0) {
				BLOCKOFFSET[strain] = 0;
				STARTBLOCK[strain] = block;
				return len;	// block is first block of chromosome -> read could not have been mapped!!
			}
			
			prevprevblock = BLOCK_TABLE[prevblock].next_strain_end;
				
			if (strain == 0) {
				if (!stick_to_strain && !in_superblock) {
					while (prevprevblock != 0) {
						if (SEQS[BLOCK_TABLE[prevprevblock].strain] < 0) {
							SEQS[BLOCK_TABLE[prevprevblock].strain] = get_genome_pos_left(prevprevblock, strlen(BLOCK_TABLE[prevprevblock].seq), BLOCK_TABLE[prevprevblock].strain, len, 1);	// recursion with new strain
							BRANCHES = 1;
							
							STRAINPOS[BLOCK_TABLE[prevprevblock].strain] = BLOCK_TABLE[prevprevblock].strainpos + (strlen(BLOCK_TABLE[prevprevblock].seq) - len) + 1;
						}
						
						prevprevblock = BLOCK_TABLE[prevprevblock].next_strain_end;
					}
				}
				block = prevblock;
			}
			else {
				if (BLOCK_TABLE[prevblock].strain == 0 && prevprevblock != 0) {
					block = prevblock;
					prevblock = prevprevblock;
					while (BLOCK_TABLE[prevblock].strain != strain && prevblock != 0) {
						prevblock = BLOCK_TABLE[prevblock].next_strain_end;
					}
					if (BLOCK_TABLE[prevblock].strain == strain) block = prevblock;
				}
				else {
					block = prevblock;
				}
			}
				
			
			if (block == 0) { fprintf(stderr, "BLOCK = 0!!\n"); exit(0); }
				
		}
		blockpos = strlen(BLOCK_TABLE[block].seq);
		
	} // while len != 0
	
	
	BLOCKOFFSET[strain] = startpos;
	STARTBLOCK[strain] = block;
	
	return len;
}

int get_genome_pos_right(unsigned int block, int blockpos, unsigned int strain, unsigned int len, char stick_to_strain)
{
	unsigned int nextblock, nextnextblock, lastlen = len, block_seq_len;
	char in_superblock = (strain == 0) && BLOCK_TABLE[block].strainpos;
	int indel_offset;
	
	/*while (((signed int) strlen(BLOCK_TABLE[block].seq) + BLOCK_TABLE[block].indel_offset) < 0) {
		block = BLOCK_TABLE[block].next_block;
		blockpos = 0;
	}*/

	if (strlen(BLOCK_TABLE[block].seq) - 1 != blockpos) blockpos++;
	else {
		// jump to next block:
		nextblock = BLOCK_TABLE[block].next_block;
		if (nextblock == 0) return len;

		nextnextblock = BLOCK_TABLE[nextblock].next_strain_front;
		
		if (strain == 0) block = nextblock;
		else if (nextnextblock == 0) block = nextblock; // not at a conserved block/superblock border
		else {
			block = nextblock;		// block must be now the first ref block of the superblock
                	nextblock = nextnextblock;	// first strain block in the superblock
                        while (BLOCK_TABLE[nextblock].strain != strain && nextblock != 0) { // every following strain block
                        	nextblock = BLOCK_TABLE[nextblock].next_strain_front;
                        }
                        if (BLOCK_TABLE[nextblock].strain == strain) block = nextblock; // until correct strain block is found, OR block remains on first ref block of superblock
		}

		blockpos = 0;
	}
	
	while (len != 0) {
		
		if (BLOCK_TABLE[block].indel_offset < 0) indel_offset = BLOCK_TABLE[block].indel_offset + 1;
		else indel_offset = 0;

		block_seq_len = strlen(BLOCK_TABLE[block].seq);
		//if (block_seq_len == blockpos) { DEBUG = 1; }// blockpos = 0; }

		// jump over deletions:
		if (block_seq_len + indel_offset == 0) {
			block = BLOCK_TABLE[block].next_block;
			block_seq_len = strlen(BLOCK_TABLE[block].seq);
			blockpos = 0;
			continue;
		}
		
		if (block_seq_len + indel_offset - blockpos >= len) {
			
			// here save end pos of hit (pos relative to strain)!!!
			lastlen = len + blockpos - 1;
			
			len = 0;
		}
		else {
			len = len - block_seq_len - indel_offset + blockpos;
			
			nextblock = BLOCK_TABLE[block].next_block;
			
			if (nextblock == 0) {
				return len;	// block is last block of chromosome -> read could not have been mapped!!
			}
			
			nextnextblock = BLOCK_TABLE[nextblock].next_strain_front;
			
			if (strain == 0) {
				if (!stick_to_strain && !in_superblock) {
					while (nextnextblock != 0) {
						if (SEQS[BLOCK_TABLE[nextnextblock].strain] < 0) {
							SEQS[BLOCK_TABLE[nextnextblock].strain] = get_genome_pos_right(nextnextblock, 0, BLOCK_TABLE[nextnextblock].strain, len, 1);	// recursion with new strain
							BRANCHES = 1;
						}
						
						nextnextblock = BLOCK_TABLE[nextnextblock].next_strain_front;
					}
				}
				block = nextblock;
			}
			else {
				if (nextnextblock != 0) {
				//if (BLOCK_TABLE[nextblock].strain == 0 && nextnextblock != 0) { //BLOCK_TABLE[nextnextblock].strain != 0) {
					block = nextblock;
					nextblock = nextnextblock;
					while (BLOCK_TABLE[nextblock].strain != strain && nextblock != 0) {
						nextblock = BLOCK_TABLE[nextblock].next_strain_front;
					}
					if (BLOCK_TABLE[nextblock].strain == strain) block = nextblock;
				}
				else {
					block = nextblock;
				}
			}
			
			lastlen = 0;
		}
		blockpos = 0;
		
	} // while len != 0
	
	STARTBLOCK[strain] = block;
	BLOCKOFFSET[strain] = lastlen;
	
	return len;
}



#if OLD != 1
void printgenome()
{
	printf("G E N O M E: \n");
	unsigned int i,c;
	HIT *hit;
	CHROMOSOME_ENTRY *ce;
	for (i=0; i!=LONGEST_CHROMOSOME; ++i) {
		c=0;
		ce = *(GENOME+i);
		if (ce != NULL) {
			printf("%d: ",i);
			while (ce != NULL) {
				++c;
				printf("(%d, %d, %d, %d) ", ce->chromosome+1, ce->genome_pos, ce->strain, (ce->next!=NULL));
				hit = ce->mapping_entries->hit;
				printhit(hit);
				ce = ce->next;
			}
			printf("((%d))\n",c);
		}
	}	
}
#endif

//@TODO remove num
int size_hit(HIT *hit, unsigned int *oldlength)
{
	HIT *last, *next;
	
	// close the gap where the hit is taken out
	// shortest possible hits are not under control of the operator so far
	if (*oldlength > INDEX_DEPTH - 1) {
		
		if (hit->last == NULL) { //hit is the first in the list, the operator must be re-linked
			*(HIT_LISTS_OPERATOR+*oldlength) = hit->next;
			next = hit->next;
			if (next != NULL) {
				next->last = NULL;
			}
		}
		else {
			last = hit->last;
			next = hit->next;
			last->next = next;
			if (next != NULL) {
				next->last = last; 
			}
		}
	}

	// add to new list
	if (*(HIT_LISTS_OPERATOR+hit->length) != NULL) {
		next = *(HIT_LISTS_OPERATOR+hit->length);
		next->last = hit;
		hit->next = next;
		hit->last = NULL;
		*(HIT_LISTS_OPERATOR+hit->length) = hit;
	}
	else {
		hit->last = NULL;
		hit->next = NULL;
		*(HIT_LISTS_OPERATOR+hit->length) = hit;
	}

	if (hit->length > LONGEST_HIT) {
		LONGEST_HIT = hit->length;
	}

	return(1);
}



// for each read:
int browse_hits() 
{
	HIT* hit;
	int i;
	char perfect = 0;
	char printed = 0;
	
	// browse hit_list foreach hitlength:
	for (i=READ_LENGTH; i!=INDEX_DEPTH - 1; --i) {

		// if only perfect reads should be reported, break earlier:
		if ((NUM_EDIT_OPS == 0) && (i < READ_LENGTH)) { 
			if (printed) return 1;
				else return 0;
		}
		
		// if hitlength limit is reached, break earlier:
		if (i == HITLEN_LIMIT - 1) {
			if (printed) return 1;
				else return 0;
		}
	
		if ((*(HIT_LISTS_OPERATOR + i)) != NULL) {

			hit = *(HIT_LISTS_OPERATOR + i);
			
			// foreach hit with hitlength i:
			while (hit != NULL) {
				
				/*if (STATISTICS) {
					NUM_HITS++;
					HITS_PER_READ++;
				}*/
				
				// Hit spans the whole read:
				if (i == READ_LENGTH) {

						if (STATISTICS && hit->mismatches == 0) {
						
							// reporting perfect matching reads (only one count per read)
							if (!perfect) {
								PERFECT_READS++;
								perfect = 1;
							}
							
							// reporting perfect hits
							if (hit->chromosome > 0) PERFECT_HITS++;
								else PERFECT_HITS_REV++;
						}
						
						// report match:
						if (hit->mismatches <= NUM_EDIT_OPS) {

							// insert hit into HITS_BY_SCORE
							insert_into_scorelist(hit, 1);
						
							printed = 1;
							if (STATISTICS) NOT_ALIGNED[0]++;
							
							if (!ALL_HIT_STRATEGY && hit->mismatches < NUM_EDIT_OPS) NUM_EDIT_OPS = hit->mismatches;
						}
				}
				else {
					// Alignment

					// for MM=1: hit must either start at readpos 1 or end at end of read, otherwise more than 1 MM:
					if (hit->mismatches < NUM_MISMATCHES) {
						
						if (NUM_GAPS != 0) {
							// KBOUND:
							if (prepare_kbound_alignment(hit)) printed = 1;
						}
						else {
							// SIMPLE:
							if (align_hit_simple(hit)) printed = 1;
						}
					}

				} // else has mismatches


				if (STATISTICS) HITS_LEN[hit->length]++;

				hit = hit->next;
				
			} // while hitlist not empty
			
		}
		
	} //for each hitlength
	
	return printed;
}


int insert_into_scorelist(HIT* hit, char d)
{		
	if (d && check_duplicate(hit)) return 0;
	
	int interval = (hit->mismatches-hit->gaps) * MM_SCORE + hit->gaps * GAP_SCORE - (READ_LENGTH-hit->mismatches) * M_SCORE;
	if (HITS_BY_SCORE[interval].num == 0) {
		// first entry in list
		HITS_BY_SCORE[interval].hitpointer = hit;
	}
	else {
		// list has already some entries, insert to the front
		HIT *tmp_hit = HITS_BY_SCORE[interval].hitpointer;
		HITS_BY_SCORE[interval].hitpointer = hit;
		hit->same_eo_succ = tmp_hit;
	}
	HITS_BY_SCORE[interval].num++;
	HITS_IN_SCORE_LIST++;
	
	if (!ALL_HIT_STRATEGY && hit->mismatches < NUM_EDIT_OPS) NUM_EDIT_OPS = hit->mismatches;
	
	return 1;
}



int check_duplicate(HIT* hit)
{
	int pos_slot;

	if (hit->strain == 0) pos_slot = BLOCK_TABLE[hit->startblock].pos + hit->blockoffset;
	else pos_slot = BLOCK_TABLE[hit->startblock].strainpos + hit->blockoffset;
	
	CHROMOSOME_ENTRY *chromosome_director, *chromosome_director_new;
	MAPPING_ENTRY *mapping_entry;
	char flag = 0;
	
	if (*(GENOME+pos_slot) == NULL) {
				flag = 1;
				chromosome_director = alloc_chromosome_entry(&pos_slot, &(hit->chromosome), &(hit->strain));

				if (!chromosome_director) return(0);	// chrom_container_size too small -> cancel this read
				*(GENOME + pos_slot) = chromosome_director;
				
	}
	else {
		
		chromosome_director = *(GENOME + pos_slot);

		// is chrom_director from the actual read or from a former one?
		if (chromosome_director >= (CHROMOSOME_ENTRY_OPERATOR->entries+CHROMOSOME_ENTRY_OPERATOR->used) 
			|| 	(chromosome_director < (CHROMOSOME_ENTRY_OPERATOR->entries+CHROMOSOME_ENTRY_OPERATOR->used) &&
				(chromosome_director->genome_pos != pos_slot)))
		{
			// it's from a former read, thus we need a new chrom_director:
			chromosome_director = alloc_chromosome_entry(&pos_slot, &(hit->chromosome), &(hit->strain));

			if (!chromosome_director) return(0);	// chrom_container_size too small -> cancel this read
			*(GENOME + pos_slot) = chromosome_director;

		}
	}
	
	// Search for chromosome_director for the correct chromosome/strand/strain
	while (chromosome_director->next != NULL && (chromosome_director->strain != hit->strain ||
			chromosome_director->chromosome != hit->chromosome))
	{
		chromosome_director = chromosome_director->next;
	}

	if (chromosome_director->chromosome != hit->chromosome) {

		chromosome_director_new = alloc_chromosome_entry(&pos_slot, &(hit->chromosome), &(hit->strain));
        	if (!chromosome_director_new) return(0);
		
		chromosome_director_new->next = chromosome_director->next;
		chromosome_director->next = chromosome_director_new;
		chromosome_director = chromosome_director_new;

	}

	if (chromosome_director->mapping_entries == NULL) {
		mapping_entry = alloc_mapping_entry();
		chromosome_director->mapping_entries = mapping_entry;
		mapping_entry->hit = hit;
	}
	else {
		
		if (flag == 1) {
			printf("!!!!!!!!!!!Found entry in chr dir at longest_genome pos %d  --  %p\n", pos_slot, chromosome_director->mapping_entries	);
			exit(1);
		}
		
		unsigned char score2, score1 = (hit->mismatches-hit->gaps) * MM_SCORE + hit->gaps * GAP_SCORE - (READ_LENGTH - hit->mismatches) * M_SCORE;
		
		mapping_entry = chromosome_director->mapping_entries;
		
		while (mapping_entry != NULL) {
			score2 = (mapping_entry->hit->mismatches-mapping_entry->hit->gaps) * MM_SCORE
					 + mapping_entry->hit->gaps * GAP_SCORE - (READ_LENGTH - mapping_entry->hit->mismatches) * M_SCORE;
		 
			if (mapping_entry->hit->length == hit->length && mapping_entry->hit->strain == hit->strain && score1 == score2) {
				if (STATISTICS) REDUNDANT++;
				return 1;
			}
			
			mapping_entry = mapping_entry->succ;
		}
		
		// no hit with same strain, same end pos of read and same score could have been found -> create another entry:
		mapping_entry = alloc_mapping_entry();
		mapping_entry->succ = chromosome_director->mapping_entries;
		
		//insert to the front of the mapping list
		chromosome_director->mapping_entries = mapping_entry;
		mapping_entry->hit = hit;

	}
	

	return 0;
}


// for debugging
char *get_seq(unsigned int n)
{
	char *seq = (char *) malloc ((INDEX_DEPTH+1)*sizeof(char));
	int i, c;
	for (i=INDEX_DEPTH-1; i>=0; --i) {
		c = (int) (n / POWER[i]);
		switch (c)
		{
			case 0: seq[i] = 'A';
					break;
			case 1: seq[i] = 'C';
					break;
			case 2: seq[i] = 'G';
					break;
			case 3: seq[i] = 'T';
					break;
		}
		n -= (int) (c * POWER[i]);
	}
	seq[INDEX_DEPTH] = '\0';
	return seq;
}

