// Authors: Korbinian Schneeberger and Joerg Hagmann
// Copyright (C) 2008/09 by Max-Planck Institute for Developmental Biology, Tuebingen, Germany

#include "genomemapper.h"

int kbound_overhang_alignment(HIT* hit, int offset, int readseq_start);
int kbound_global_alignment(HIT* hit);

char unique_base(char c)
{
	return (c == 'A' || c == 'C' || c == 'G' || c == 'T');
}

int align_hit_simple(HIT* hit)
{
	unsigned int strain, len;
	HIT *orihit = (HIT *) malloc(sizeof(HIT));;
	char *seq = (char *) malloc(4 + MAX_READ_LENGTH * sizeof(char));
	char any_success = 0, success;
	int j,k;
	char in_superblock = (hit->strain == 0) && BLOCK_TABLE[hit->startblock].strainpos;
	
	//////////////////////////////////////
	// get sequence(s) out of block table:
	BRANCHES = 0;
	
	// stores all sequences to which mapping has to occur in SEQ and the corresponding strains in SEQS!
	if (hit->chromosome < 0) len = READ_LENGTH - hit->readpos + 1;
	else len = hit->readpos - 1;

	if (len > hit->blockoffset) SEQS[hit->strain] = get_genome_pos_left(hit->startblock, hit->blockoffset, hit->strain, len, in_superblock);
	else {
		SEQS[hit->strain] = 0;
		STARTBLOCK[hit->strain] = hit->startblock;
		BLOCKOFFSET[hit->strain] = hit->blockoffset - len;
		STRAINPOS[hit->strain] = BLOCK_TABLE[hit->startblock].strainpos + BLOCKOFFSET[hit->strain];
	}
	
	if (SEQS[hit->strain] == 0) {
		if (hit->chromosome < 0) {
			hit->strainpos = hit->strainpos - (READ_LENGTH - hit->readpos + 1);
		}
		else {
			hit->strainpos = hit->strainpos - (hit->readpos - 1);
		}
		get_readseq_right(STARTBLOCK[hit->strain], BLOCKOFFSET[hit->strain], hit->strain, READ_LENGTH, in_superblock, 0, 0);
		SEQ[hit->strain][READ_LENGTH] = '\0';
	}
	else {
		SEQ[hit->strain][0] = '\0';
		SEQS[hit->strain] = -1;
	}
	
	// adjusting the start pos of hit to start pos of read (print_alignment demands this)	STARTBLOCK and STRAINOFFSET was set in get_genome_pos_left
	hit->startblock = STARTBLOCK[hit->strain];
	hit->blockoffset = BLOCKOFFSET[hit->strain];

	memcpy(orihit, hit, sizeof(HIT));
		
	//////////////////////////////////////

	
	for (k=0; k<=NUM_STRAINS; ++k) {

		success = 1;
		
		if (!BRANCHES) strain = orihit->strain;
		else strain = k;
		
		if (SEQS[strain] != 0) {
			SEQ[strain][0] = '\0';
			SEQS[strain] = -1;
			continue;
		}
		
		if (strlen(SEQ[strain]) == 0) { // if left superblock has strain seq, but not right superblock -> fill up to the right
			SEQS[strain] = get_readseq_right(STARTBLOCK[strain], BLOCKOFFSET[strain], strain, READ_LENGTH, 0, 1, 0);
			
			if (SEQS[strain] != 0) {
				SEQ[strain][0] = '\0';
				SEQS[strain] = -1;
				continue;
			}
		}

		decode_strainseq(seq, SEQ[strain]);
		strcpy(SEQ[strain], seq);
		
		
		// create hit if necessary
		if (strain != orihit->strain) {
			hit = alloc_hit();
			
			hit->chromosome = orihit->chromosome;
			if (hit->chromosome > 0) {
				hit->readpos = orihit->readpos;
			}
			else {
				hit->readpos = orihit->readpos + orihit->length - INDEX_DEPTH;
			}
			hit->strain = strain;
			hit->strainpos = STRAINPOS[strain];

			hit->mismatches = 0;

			hit->startblock = STARTBLOCK[strain];
			hit->blockoffset = BLOCKOFFSET[strain];
			hit->length = INDEX_DEPTH;
			
		}
		else memcpy(hit, orihit, sizeof(HIT));

		// from read[0] to read[hit->readpos]
		for (j=0; j!=hit->readpos-1; ++j) {
					/*if (DEBUG){ if (hit->orientation == '+') {printf("%d,%c ",hit->start - hit->readpos + j,CHR_SEQ[hit->chromosome][hit->start - hit->readpos + j]);}
						else printf("%d,%c ",hit->end + hit->readpos -2 -j,get_compl_base(CHR_SEQ[hit->chromosome][hit->end + hit->readpos - 2 - j]));}*/

			if (	hit->chromosome > 0
				&& (	(SEQ[hit->strain][j] != READ[j])  
					|| !(unique_base(READ[j]))		// [XX] should also be a mismatch!
					// if read[j]=X and chr_seq not, then first or-condition is automatically true -> genome sequence doesn't have to be checked
				) 
			) 
			{
				// create mismatch:
				if (hit->mismatches < NUM_MISMATCHES) {
					(hit->edit_op[hit->mismatches]).pos = j+1;
					(hit->edit_op[hit->mismatches]).mm = 1;
				}
		
				hit->mismatches++;
			}
				
			if (	hit->chromosome < 0 
				&& (    (get_compl_base(SEQ[hit->strain][j]) != READ[j])  
					|| !(unique_base(READ[j]))
				)
			)
			{
				// create mismatch:
				if (hit->mismatches < NUM_MISMATCHES) {
					hit->edit_op[hit->mismatches].pos = READ_LENGTH - j;
					hit->edit_op[hit->mismatches].mm = 1;
				}
		
				hit->mismatches++;
			}
			
			if (hit->mismatches > NUM_MISMATCHES) {
				success = 0;
				break;
			}
		}
		
		if (success == 0) {
			SEQ[hit->strain][0] = '\0';
			SEQS[hit->strain] = -1;
			continue;		// next strain, current one was unsuccessful
		}
		
		// from read[hit->readpos + hitlength] to read[READ_LENGTH - 1]
		for (j = hit->length; j < READ_LENGTH; ++j) {
	
			if (	(hit->chromosome > 0) 
				&& (    (SEQ[hit->strain][j] != READ[j])  
					|| !(unique_base(READ[j]))		// [XX] should also be a mismatch!
					// if read[j]=X and chr_seq not, then first or-condition is automatically true -> genome sequence doesn't have to be checked
				) 
			)
			{
				// create mismatch:
				if (hit->mismatches < NUM_MISMATCHES) {		
					(hit->edit_op[hit->mismatches]).pos = j+1;
					(hit->edit_op[hit->mismatches]).mm = 1;
				}
		
				hit->mismatches++;
			}
	
				
			if (	(hit->chromosome < 0)
				&& (    (get_compl_base(SEQ[hit->strain][j]) != READ[j])  
					|| !(unique_base(READ[j]))
				)
			)
			{
				// create mismatch:
				if (hit->mismatches < NUM_MISMATCHES) {
					(hit->edit_op[hit->mismatches]).pos = READ_LENGTH - j;
					(hit->edit_op[hit->mismatches]).mm = 1;
				}
		
				hit->mismatches++;
			}
			
			if (hit->mismatches > NUM_MISMATCHES) {
				success = 0;
				break;
			}
			
			// update hit:
			hit->length++;
			
		}
		
		
		if (success) {
			
			// insert hit into HITS_BY_SCORE
			if (insert_into_scorelist(hit, 1)) any_success = 1; 
			
			if (!ALL_HIT_STRATEGY && hit->mismatches < NUM_EDIT_OPS) NUM_EDIT_OPS = hit->mismatches;
		}

		SEQ[hit->strain][0] = '\0';
		SEQS[hit->strain] = -1;
		if (!BRANCHES) break;
	} //for all strains

	free(seq);
	free(orihit);

	return any_success;
}


// returns if aligned hit fulfills MM and gap-criterias, thus is printed out (alignments are called in this method)
int prepare_kbound_alignment(HIT* hit)
{

	int strain, offset;
	unsigned int k, len;
	HIT *orihit = (HIT *) malloc(sizeof(HIT));
	char *seq = malloc(MAX_READ_LENGTH * sizeof(char));
	char any_success = 0;
	char in_superblock = (hit->strain == 0) && BLOCK_TABLE[hit->startblock].strainpos;
	

	//////////////////////////////////////
	// get sequence(s) out of block table:

	BRANCHES = 0;
//if (DEBUG) { printf("--------\n"); printhit(hit); }
	
	// stores all sequences to which mapping has to occur in SEQ and the corresponding strains in SEQS!
	if (hit->chromosome < 0) len = READ_LENGTH - hit->length - hit->readpos + 1 + NUM_GAPS;
	else len = hit->readpos - 1 + NUM_GAPS;

	if (len > hit->blockoffset) SEQS[hit->strain] = get_genome_pos_left(hit->startblock, hit->blockoffset, hit->strain, len, in_superblock);
	else {
		SEQS[hit->strain] = 0;
		STARTBLOCK[hit->strain] = hit->startblock;
		BLOCKOFFSET[hit->strain] = hit->blockoffset - len;
		STRAINPOS[hit->strain] = BLOCK_TABLE[hit->startblock].strainpos + BLOCKOFFSET[hit->strain];
	}

	if (SEQS[hit->strain] <= NUM_GAPS) {
		if (hit->chromosome < 0) hit->strainpos = hit->strainpos - (READ_LENGTH - hit->length - hit->readpos + 1);
		else			 hit->strainpos = hit->strainpos - (hit->readpos - 1);
		if (get_readseq_right(STARTBLOCK[hit->strain], BLOCKOFFSET[hit->strain], hit->strain, (NUM_GAPS - SEQS[hit->strain]) + READ_LENGTH + NUM_GAPS, in_superblock, 0, 1) > NUM_GAPS) {
			SEQ[hit->strain][0] = '\0';
			SEQS[hit->strain] = -1;
		}
	}
	else {
		SEQ[hit->strain][0] = '\0';
		SEQS[hit->strain] = -1;
	}

	// adjusting the start pos of hit to start pos of read (print_alignment demands this)	STARTBLOCK and STRAINOFFSET was set in get_readseq_left
	hit->startblock = STARTBLOCK[hit->strain];
	hit->blockoffset = BLOCKOFFSET[hit->strain];
	
	memcpy(orihit, hit, sizeof(HIT));
	
//if (DEBUG) { printf("ORIHIT: "); printhit(hit); }
	//////////////////////////////////////


	for (k=0; k<=NUM_STRAINS; ++k) {
		
		if (!BRANCHES) strain = orihit->strain;
		else strain = k;
//if (DEBUG) { printf("\n  SEQS[%d] = %d\n",strain, SEQS[strain]); }
		
		if (SEQS[strain] < 0 || SEQS[strain] > NUM_GAPS) {
			SEQ[strain][0] = '\0';
			SEQS[strain] = -1;
			continue;
		}
		
		if (strlen(SEQ[strain]) == 0) { // if left superblock has strain seq, but not right superblock -> fill up to the right
			STRAINPOS[strain] += NUM_GAPS; // correcting for front overlap
			SEQS[strain] = get_readseq_right(STARTBLOCK[strain], BLOCKOFFSET[strain], strain, (NUM_GAPS - SEQS[strain]) + READ_LENGTH + NUM_GAPS, 0, 1, 1);
			
			if (SEQS[strain] > NUM_GAPS) {
				SEQ[strain][0] = '\0';
				SEQS[strain] = -1;
				continue;
			}
		}

		decode_strainseq(seq, SEQ[strain]);
		strcpy(SEQ[strain], seq);

//if (DEBUG) { printf("  SEQ[%d] = .%s.\n",strain, SEQ[strain]); }

		// create hit if necessary
		if (strain != orihit->strain) {
			hit = alloc_hit();
			
			// get_readseq_left has delivered the sequence, but the "startpointers" have to be set back to hitstart
			len = strlen(SEQ[strain]) - READ_LENGTH - (NUM_GAPS - SEQS[strain]);
			if (BLOCKOFFSET[strain] + len >= strlen(BLOCK_TABLE[STARTBLOCK[strain]].seq)) {
				get_genome_pos_right(STARTBLOCK[strain], BLOCKOFFSET[strain], strain, len, 1);
			}
			else {
				BLOCKOFFSET[strain] += len;
			}
			
			hit->chromosome = orihit->chromosome;
			if (hit->chromosome > 0) {
				hit->readpos = orihit->readpos;
			}
			else {
				hit->readpos = orihit->readpos + orihit->length - INDEX_DEPTH;
			}
			hit->strain = strain;
			hit->strainpos = STRAINPOS[strain];

			hit->mismatches = 0;

			hit->startblock = STARTBLOCK[strain];
			hit->blockoffset = BLOCKOFFSET[strain];
			hit->length = INDEX_DEPTH;

//if (DEBUG) { printf("  NEWHIT: "); printhit(hit); printf("\n"); }
		}
		else {
			memcpy(hit, orihit, sizeof(HIT));
			// get_readseq_left has delivered the sequence, but the "startpointers" have to set back to hitstart
			len = strlen(SEQ[strain]) - READ_LENGTH - (NUM_GAPS - SEQS[strain]);
			if (BLOCKOFFSET[strain] + len >= strlen(BLOCK_TABLE[STARTBLOCK[strain]].seq)) get_genome_pos_right(STARTBLOCK[strain], BLOCKOFFSET[strain], strain, len, 1);
			else BLOCKOFFSET[strain] += len;
			hit->startblock = STARTBLOCK[strain];
			hit->blockoffset = BLOCKOFFSET[strain];
		}
		
		
		// just perform global alignment if gap heuristic/speedup was disabled:
		if (!OVERHANG_ALIGNMENT) {
			
			if (kbound_global_alignment(hit)) {
				if (!ALL_HIT_STRATEGY && hit->mismatches < NUM_EDIT_OPS) NUM_EDIT_OPS = hit->mismatches;
				if (insert_into_scorelist(hit, 1)) any_success = 1;
			}
			
			SEQ[strain][0] = '\0';
			SEQS[strain] = -1;
			continue;
		}



		// perform whole alignment pipeline:
		
		char k1_aligned;

		// ########## FRONT ALIGNMENT ##########
//if (DEBUG) { printf("alignemnt geht los: "); printhit(hit); }

		// if hit.readpos == 2, then first base of read must be mismatch (spares alignment):
		if (hit->readpos == 2) {
			if (hit->mismatches < NUM_MISMATCHES && hit->mismatches - hit->gaps < NUM_MISMATCHES) {
				if (hit->chromosome > 0) {
					hit->length++;
					hit->readpos--;
					
					if (strain == 0 ||
						(strain != 0 && (BLOCK_TABLE[STARTBLOCK[strain]].seq[BLOCKOFFSET[strain]] != READ[0] || !unique_base(READ[0]))))
					{
						// create mismatch:
						hit->edit_op[hit->mismatches].pos = 1;
						hit->edit_op[hit->mismatches].mm = 1;
						hit->mismatches++;
//if (DEBUG) printf("%c - %c\n", BLOCK_TABLE[STARTBLOCK[strain]].seq[BLOCKOFFSET[strain]],READ[0]);
					}
				}
				else {
					if (hit->blockoffset + READ_LENGTH > strlen(BLOCK_TABLE[hit->startblock].seq))
						get_genome_pos_right(hit->startblock, hit->blockoffset, strain, READ_LENGTH-1, 1);
					else {
						STARTBLOCK[strain] = hit->startblock;
						BLOCKOFFSET[strain] = hit->blockoffset + READ_LENGTH - 1;
					}
					hit->length++;
					hit->readpos--;

//if (DEBUG) printf("%c - %c (block %d/%d offset %d/%d blocklen %d)\n", BLOCK_TABLE[STARTBLOCK[strain]].seq[BLOCKOFFSET[strain]],READ[0], hit->startblock, STARTBLOCK[strain], hit->blockoffset, BLOCKOFFSET[strain], (int) strlen(BLOCK_TABLE[hit->startblock].seq));
					if (strain == 0 ||
						(strain != 0 && (get_compl_base(BLOCK_TABLE[STARTBLOCK[strain]].seq[BLOCKOFFSET[strain]]) != READ[0] || !unique_base(READ[0]))))
					{
						// create mismatch:
						hit->edit_op[hit->mismatches].pos = READ_LENGTH;
						hit->edit_op[hit->mismatches].mm = 1;
						hit->mismatches++;
//if (DEBUG) printhit(hit);
					}

					STARTBLOCK[strain] = hit->startblock;	// reset because get_genome_pos_right changed it
					BLOCKOFFSET[strain] = hit->blockoffset; // reset because get_genome_pos_right changed it
				}
			}
			else {
				SEQ[strain][0] = '\0';
				SEQS[strain] = -1;
				continue;
			}
		}
		// perform front alignment:
		else if (hit->readpos > 2) {
			
				if (hit->chromosome > 0) {
					offset = strlen(SEQ[strain]) - READ_LENGTH - (NUM_GAPS - SEQS[strain]);
				} 
				else {
					offset = NUM_GAPS - SEQS[strain];
				}
	
				// perform alignment
				k1_aligned = kbound_overhang_alignment(hit, offset, 0);
//if (DEBUG) { printf("    vorne: after overhang (%d): ", k1_aligned); printhit(hit); printf("\n"); }
				
				// there are gaps on best path in alignment -> perform whole global alignment
				if (k1_aligned == 0) {

					if (kbound_global_alignment(hit)) {
//if (DEBUG) { printf("    vorne: after global (1): "); printhit(hit); printf("\n"); }
						if (!ALL_HIT_STRATEGY && hit->mismatches < NUM_EDIT_OPS) NUM_EDIT_OPS = hit->mismatches;
						if (insert_into_scorelist(hit, 1)) any_success = 1;
					}
else { if (DEBUG) { printf("    vorne: after global (0): "); printhit(hit); printf("\n"); } }
					
					SEQ[strain][0] = '\0';
					SEQS[strain] = -1;
					continue;
				}
				
				// too many mismatches in aligned read already:
				if (k1_aligned == -1) {
					SEQ[strain][0] = '\0';
					SEQS[strain] = -1;
					continue;
				}
				
		}
//if (DEBUG) printf("hit->length %d, hit->readpos %d , READ_LENGTH %d\n",hit->length, hit->readpos, READ_LENGTH );
//if (DEBUG) printhit(hit);



		// ########## END ALIGNMENT ##########

		// if hit ends at pos |read|-1, then last base of read is a mismatch:
		if (hit->length + hit->readpos == READ_LENGTH) {
			
			// create mismatch:
			if (hit->chromosome > 0) {
				if (hit->blockoffset + READ_LENGTH > strlen(BLOCK_TABLE[hit->startblock].seq))
					get_genome_pos_right(hit->startblock, hit->blockoffset, strain, READ_LENGTH-1, 1);
				else {
					STARTBLOCK[strain] = hit->startblock;
					BLOCKOFFSET[strain] = hit->blockoffset + READ_LENGTH - 1;
				}
				hit->length++;
				
				// only on reference it is sure that last base is mismatch since hit would have been extended
				// on strains hits must not be maximally extended (normally they arent)
//if (DEBUG) printf("%c - %c (position %d)\n",BLOCK_TABLE[STARTBLOCK[strain]].seq[BLOCKOFFSET[strain]],READ[READ_LENGTH-1], BLOCKOFFSET[strain]+BLOCK_TABLE[hit->startblock].strainpos);
				if (strain == 0 ||
					(strain != 0 && (BLOCK_TABLE[STARTBLOCK[strain]].seq[BLOCKOFFSET[strain]] != READ[READ_LENGTH-1] || !unique_base(READ[READ_LENGTH-1]))))
				{
					if (hit->mismatches < NUM_EDIT_OPS && hit->mismatches - hit->gaps < NUM_MISMATCHES) {
						hit->edit_op[hit->mismatches].pos = READ_LENGTH;
						hit->edit_op[hit->mismatches].mm = 1;
						hit->mismatches++;
//if (DEBUG) printhit(hit);
					}
					else {
						SEQ[strain][0] = '\0';
						SEQS[strain] = -1;
						continue;
					}
				}

				STARTBLOCK[strain] = hit->startblock;	// reset because get_genome_pos_right changed it
				BLOCKOFFSET[strain] = hit->blockoffset; // reset because get_genome_pos_right changed it
			}
			else {
				hit->length++;
				
				if (strain == 0 ||
					(strain != 0 && (get_compl_base(BLOCK_TABLE[STARTBLOCK[strain]].seq[BLOCKOFFSET[strain]]) != READ[READ_LENGTH-1] || !unique_base(READ[READ_LENGTH-1]))))
				{
					if (hit->mismatches < NUM_EDIT_OPS && hit->mismatches - hit->gaps < NUM_MISMATCHES) {
						hit->edit_op[hit->mismatches].pos = 1;
						hit->edit_op[hit->mismatches].mm = 1;
						hit->mismatches++;
					}
					else {
						SEQ[strain][0] = '\0';
						SEQS[strain] = -1;
						continue;
					}
				}
			}
		}
		// perform end alignment:
		else if (hit->length + hit->readpos < READ_LENGTH) {

				if (hit->chromosome > 0) {
					offset = NUM_GAPS - SEQS[strain];
				}
				else {
					offset = strlen(SEQ[strain]) - READ_LENGTH - (NUM_GAPS - SEQS[strain]);
				}
	
				// if hit on Ref: perform alignment if at least one edit op can still be afforded:
				// hits on strains must not be maximally extended -> can happen that aligned part is identical with read -> alignment has to be launched since no MMs will be added
				if (strain != 0 || (hit->mismatches < NUM_EDIT_OPS && hit->mismatches - hit->gaps < NUM_MISMATCHES)) {
					k1_aligned = kbound_overhang_alignment(hit, offset, hit->readpos+hit->length-1);
//if (DEBUG) { printf("    hinten: after overhang (%d): ", k1_aligned); printhit(hit); printf("\n"); }
				}
				else {
					SEQ[strain][0] = '\0';
					SEQS[strain] = -1;
					continue;
				}
				
				// there are gaps on best path in alignment -> perform whole global alignment
				if (k1_aligned == 0) {

					if (kbound_global_alignment(hit)) {
						if (!ALL_HIT_STRATEGY && hit->mismatches < NUM_EDIT_OPS) NUM_EDIT_OPS = hit->mismatches;
						if (insert_into_scorelist(hit, 1)) any_success = 1;
//if (DEBUG) { printf("    hinten: after global (1): "); printhit(hit); printf("\n"); }
					}
else { if (DEBUG) { printf("    hinten: after global (0): "); printhit(hit); printf("\n"); }}
					
					SEQ[strain][0] = '\0';
					SEQS[strain] = -1;
					continue;
				}
					
				// too many mismatches?
				if (k1_aligned == -1) {
					SEQ[strain][0] = '\0';
					SEQS[strain] = -1;
					continue;
				}
		}
		
		// gapless end AND front alignment were successful -> update hitlength and insert hit into HITS_BY_SCORE:
		hit->length += READ_LENGTH - hit->length;

		if (insert_into_scorelist(hit, 1)) any_success = 1;
		
		if (!ALL_HIT_STRATEGY && hit->mismatches < NUM_EDIT_OPS) NUM_EDIT_OPS = hit->mismatches;
	
		SEQ[strain][0] = '\0';
		SEQS[strain] = -1;
		if (!BRANCHES) break;

	} // for all strains
	
	
	free(seq);
	free(orihit);
	
	return any_success;
}




// returns 1 if alignment is gapless, 0 if there are gaps and -1 if nr of allowed MMs is exceeded 
int kbound_overhang_alignment(HIT* hit, int offset, int readstart)
{
	if (STATISTICS) NUM_ALIGNMENTS++;
	
	int K = NUM_GAPS;

	int length;
	int chrstart;
	char offset_comp = 0;
	
	int strain = hit->strain;
	
	if (readstart == 0) {
		length = hit->readpos + offset - 1;
		offset_comp = offset;
		if (hit->chromosome > 0) chrstart = 0;
			else chrstart = strlen(SEQ[strain]) - 1;
	}
	else {
		length = READ_LENGTH - readstart + offset;
		if (hit->chromosome > 0) chrstart = strlen(SEQ[strain]) - (READ_LENGTH - readstart + offset);
			else chrstart = length - 1;
	}

	
	int i,j,h;
	
	// Initialization:
	unsigned char score_before = hit->mismatches * MM_SCORE;
	if (readstart == 0) {
		for (i=0; i!=2*K+1; ++i) {
			M[i][offset] = score_before;
		}
		j = (K-offset < 0)? 0: K-offset;
		for (i=0; i!=j; ++i) {
			M[0][i+1+offset] = score_before + (i+1) * GAP_SCORE;
		}
	}
	else {
		for (i = 0; i <= K; ++i) {
			M[i][0] = score_before + i * GAP_SCORE;
			if (i!=0) { 
				M[0][i] = score_before + i * GAP_SCORE;
			}
		}
	}
	
	// Alignment:
	int c, min_i = -1;
	char best_score_on_diag = 0;
	unsigned char score;
	unsigned char best_score = WORST_SCORE + 1;
	unsigned char column_score = best_score;
	
	for (i = 1; i <= length; ++i) {
		
		for (h = -K; h <= K; ++h) {
			j = i + h;
		
				if (j <= length && j >= 1) {
					if (j>K) c = j - K;
						else c = 0;
					
				if ( !(readstart == 0 && j <= offset) && !(readstart != 0 && j > length-offset) ) {
					
					if (STATISTICS) ++CELLS_OVERHANG;
					
					// Score-Function:
					if (hit->chromosome > 0) {
						if (SEQ[strain][chrstart+i-1] != READ[readstart+j-offset_comp-1] || !unique_base(READ[readstart+j-offset_comp-1]))
							 score = MM_SCORE;
						else score = M_SCORE;
					}
					else {
						if (get_compl_base(SEQ[strain][chrstart-i+1]) != READ[readstart+j-offset_comp-1] || !unique_base(READ[readstart+j-offset_comp-1]))
							 score = MM_SCORE;
						else score = M_SCORE;
					}
					
					M[i-c][j] = M[i-c-(j<=K)][j-1] + score;
					
					if ((i-j-1 <= K) && (i-j-1 >= -K)) {
						if (M[i-c-1][j] + GAP_SCORE <= M[i-c][j]) {
							
							if (i == j) {
								if (STATISTICS) GAPS_ENCOUNTERED[0]++;
								return 0;
							}
							
							M[i-c][j] = M[i-c-1][j] + GAP_SCORE;
							
						}
					}
					if ((i-j+1 <= K) && (i-j+1 >= -K)) {
						if (M[i-c+(j>K)][j-1] + GAP_SCORE <= M[i-c][j]) {
							
							if (i == j) {
								if (STATISTICS) GAPS_ENCOUNTERED[0]++;
								return 0;
							}
							
							M[i-c][j] = M[i-c+(j>K)][j-1] + GAP_SCORE;
							
						}
					}
					
					if (readstart != 0 && j == length - offset) {
						if ( (!best_score_on_diag && M[i-c][j] < best_score) || (best_score_on_diag && M[i-c][j] <= best_score) ) {	
							// ensures that if score of most right bottom cell is equal to another cell in the same row, this other cell is reported with the best score
							best_score = M[i-c][j];
							min_i = i;
							if (i == j) best_score_on_diag = 1;
						}
					}
					
					if (M[i-c][j] < column_score) column_score = M[i-c][j];
					
				}
			}
			
		} //for h
		
		if (column_score > WORST_SCORE) {
			
			// readstart==0: score in most right bottom cell cannot become less than worst score
			// best_score > WORST_SCORE: there is no cell in the bottom row which has a valid alignment score 
			if (readstart == 0 || best_score > WORST_SCORE) {
				if (STATISTICS) TOO_MANY_MMS[0]++;
				return -1;
			}
			
			break;
			
		} else {
			column_score = WORST_SCORE + 1;
		}
		
	} //for i
	
	

	if (readstart == 0) {
		j = length;
		i = K - (j<K) * (K - j);
		best_score = M[i][j];
		min_i = j;
	}
	else {
		j = length-offset;
		i = min_i - (j>K) * (j - K);
	}
	

	if (best_score > WORST_SCORE) {
		if (STATISTICS) TOO_MANY_MMS[1]++;
		return -1;
	}
	
	// if best score is worse than max nr of allowed mismatches...
	if (best_score > WORST_MM_SCORE) {
		// ...and best score is in the most right bottom cell, there is no better path with gaps -> so we have too many MMs
		if (min_i == j) {
			if (STATISTICS) TOO_MANY_MMS[1]++;
			return -1;
		}
		// ..and there is another cell with this score other than the most right bottom cell -> there is a path with a valid score, BUT with gaps! 
		else {
			if (STATISTICS) GAPS_ENCOUNTERED[2]++;
			return 0;
		}
	}
	
	// FOR READSTART != 0 ONLY: if best path doesn't start from the most right bottom corner of the matrix -> gaps are in best alignment, return 0
	if (min_i != j) {	// even if score in most right bottom cell is equal to best score -> path with gaps needs less edit ops -> perform global algnm.
		if (STATISTICS) GAPS_ENCOUNTERED[2]++;
		return 0;
	}
	// FOR READSTART == 0: traceback has to start in most right bottom corner

	

	// Traceback (if there had been gaps the procedure returned 0, so only mismatches are left -> only diagonal traceback)
	int readpos;
	if (readstart == 0) readpos = j - offset;
		else readpos = readstart + j;
	i = K - (j<K) * (K - j);

	unsigned mms = (unsigned char) (M[i][j] / MM_SCORE) - (unsigned char) (score_before / MM_SCORE);

	
	while (j != (readstart==0)*offset && mms != 0) {
		if (M[i][j] != M[i-(j<=K)][j-1]) {
			// create mismatch:
			
			if (hit->chromosome > 0) hit->edit_op[hit->mismatches].pos = readpos;
				else hit->edit_op[hit->mismatches].pos = READ_LENGTH - readpos + 1;
			hit->edit_op[hit->mismatches].mm = 1;
			hit->mismatches++;
			
			--mms;
		}
		--readpos;
		--j;
		if (j<K) --i;
	}

	// successfully aligned
	return 1;
}






// k-bound global alignment algorithm:
int kbound_global_alignment(HIT* hit)
{
	if (STATISTICS) NUM_WHOLE_ALIGNMENTS++;
	
	int K = NUM_GAPS;
	
	// delete possible mismatches found in k1_alignment or at the beg/end:
	hit->mismatches = 0;
	
	int strain = hit->strain;
	
	int length = strlen(SEQ[strain]);
	int offset_end, offset_front;
	int chrstart;
	if (hit->chromosome < 0) {
		offset_front = NUM_GAPS - SEQS[strain];
		offset_end = length - READ_LENGTH - offset_front;
		chrstart = length - 1;
	} else {
		offset_end = NUM_GAPS - SEQS[strain];
		offset_front = length - READ_LENGTH - offset_end;
		chrstart = 0;
	}
	int start_offset = 0;


	int i,j,h;
	
	// Initialization:
	for (i=0; i!=2*K+1-(K-offset_front); ++i) {
		
		M[i][offset_front] = 0;
		T[i][offset_front] = '0';
		
		j = (K-offset_front < 0)? 0: K-offset_front;
		for (h=0; h!=j; ++h) {
			M[0][h+1+offset_front] = (h+1) * GAP_SCORE;
			T[0][h+1+offset_front] = UP;
		}
	}
	
	
	// Alignment:
	int c;
	unsigned char best_score = WORST_SCORE + 1;
	unsigned char column_score = best_score;
	unsigned char score;
	int min_i = 0;	// start of traceback
	char best_score_on_diag = 0;
	
	for (i = 1; i <= length; ++i) {
		
		for (h = -K; h <= K; ++h) {
			j = i + h;
		
				if (j <= length && j >= 1) {
					if (j>K) c = j - K;
						else c = 0;
					
				if ( j > offset_front && j <= length-offset_end ) {
					
					if (STATISTICS) ++CELLS_GLOBAL;
					
					// Score-Function:
					if (hit->chromosome > 0) {
						if (SEQ[strain][chrstart+i-1] != READ[j-offset_front-1] || !unique_base(READ[j-offset_front-1]))
							 score = MM_SCORE;
						else score = M_SCORE;
						//if (DEBUG) printf("i%d j%d chr[%d]=%c, read[%d]=%c -> score: %.1f\n", i,j,chrstart+i-1, CHR_SEQ[hit->chromosome][chrstart+i-1],	j-offset_front-1, READ[j-offset_front-1], score);	//COMP
					}
					else {
						if (get_compl_base(SEQ[strain][chrstart-i+1]) != READ[j-offset_front-1] || !unique_base(READ[j-offset_front-1]))
							 score = MM_SCORE;
						else score = M_SCORE;
						//if (DEBUG) printf("i%d j%d chr[%d]=%c, read[%d]=%c -> score: %.1f\n", i,j,chrstart-i+1, get_compl_base(CHR_SEQ[hit->chromosome][chrstart-i+1]),	j-offset_front-1, READ[j-offset_front-1], score);	//COMP
					}
					
					M[i-c][j] = M[i-c-(j<=K)][j-1] + score;
					T[i-c][j] = DIAGONAL;	// traceback diagonally
					
					if ((i-j+1 <= K) && (i-j+1 >= -K)) {
						// gap in chr
						if (M[i-c+(j>K)][j-1] + GAP_SCORE == M[i-c][j] && 
								((hit->chromosome > 0 && GAPS_MOST_RIGHT) || (hit->chromosome < 0 && !GAPS_MOST_RIGHT))) {
							M[i-c][j] = M[i-c+(j>K)][j-1] + GAP_SCORE;
							T[i-c][j] = UP;	// traceback to upper with gaps most right
						}
						else if (M[i-c+(j>K)][j-1] + GAP_SCORE < M[i-c][j]) {
							M[i-c][j] = M[i-c+(j>K)][j-1] + GAP_SCORE;
							T[i-c][j] = UP;	// traceback to upper
						}
					}
					
					if ((i-j-1 <= K) && (i-j-1 >= -K)) {
						// gap in read
						if (M[i-c-1][j] + GAP_SCORE == M[i-c][j] && 
								((hit->chromosome > 0 && GAPS_MOST_RIGHT) || (hit->chromosome < 0 && !GAPS_MOST_RIGHT))) {
							M[i-c][j] = M[i-c-1][j] + GAP_SCORE;
							T[i-c][j] = LEFT;	// traceback to the left with gaps most right
						}
						else if (M[i-c-1][j] + GAP_SCORE < M[i-c][j]) {
							M[i-c][j] = M[i-c-1][j] + GAP_SCORE;
							T[i-c][j] = LEFT;	// traceback to the left
						}
					}

					// Remember best score, i.e. start of traceback
					if (j == length - offset_end) {
						
						// gaps in reads preferred to gaps in chr:
						if ( (i <= j && M[i-c][j] <= best_score) || (i > j && ((best_score_on_diag && M[i-c][j] <  best_score) 
																		   || (!best_score_on_diag && M[i-c][j] <= best_score))) ) {
						
							best_score = M[i-c][j];
							min_i = i;
							if (i == j) best_score_on_diag = 1;
						}
					}
					// best_score preference:
					//	1. diagonal
					//	2. i > j (gaps in reads)
					//	3. i < j (gaps in chr)
					
					if (M[i-c][j] < column_score) column_score = M[i-c][j];
						
				}
			}
			
		} //for h
		
		if (column_score > WORST_SCORE) {
			if (best_score > WORST_SCORE) {
				if (STATISTICS) BREAK_GLOBAL_ALIGNMENT[0]++;
				return 0;
			}
			else {
				if (STATISTICS) BREAK_GLOBAL_ALIGNMENT[1]++;
				break;
			}
		} else {
			column_score = WORST_SCORE + 1;
		}
		
	} //for i
	
	
	if (best_score > WORST_SCORE) {
                if (STATISTICS) BREAK_GLOBAL_ALIGNMENT[0]++;
        	return 0;
	}


	j = length - offset_end;
	i = min_i - (j>K)*(j-K);
	int chrpos;
	if (hit->chromosome > 0) {
		chrpos = chrstart + min_i - 1;
		hit->length += min_i - j;
	}
	else {
		chrpos = chrstart - min_i + 1;
		start_offset -= min_i - j;
	}
	int readpos = READ_LENGTH - 1;
	
	
	
	
	// Traceback:
	
	while (T[i][j] != '0' && M[i][j] != 0) {
			
		switch (T[i][j]) {		
			case DIAGONAL: {
				
				
				//if (CHR_SEQ[hit->chromosome][chrpos] != READ[readpos] || !unique_base(READ[readpos])) {
				//if (M[i-(j<=K)][j-1] == M[i][j] - MM_SCORE) {				// doesn't work for doubles!!
				//if (fabs(M[i-(j<=K)][j-1] - M[i][j] + MM_SCORE) > 1E-9) {		// works, but requires math.h!
				if (M[i-(j<=K)][j-1] != M[i][j]) {					// requires that MM_SCORE != M_SCORE != GAP_SCORE
					if ((hit->mismatches-hit->gaps) < NUM_MISMATCHES && hit->mismatches < NUM_EDIT_OPS) {
						if (hit->chromosome > 0) hit->edit_op[hit->mismatches].pos = readpos + 1;
							else					 hit->edit_op[hit->mismatches].pos = READ_LENGTH - readpos;
						hit->edit_op[hit->mismatches].mm = 1;
						hit->mismatches++; 
					}
					else {
						if (STATISTICS) BREAK_TB_IN_GLOBAL_ALIGNMENT++;
						return 0;	// discard hit
					}
				}
				
				i = i-(j<=K);
				j--;
				if (hit->chromosome > 0) chrpos--;
					else chrpos++;
				readpos--;
				break;
			}
			
			case LEFT: {
		
				if (hit->mismatches < NUM_EDIT_OPS && (!STRINGENT_GAPLIMIT || hit->gaps < NUM_GAPS)) {
					if (STATISTICS && hit->gaps >= NUM_GAPS) W++;
					if (hit->chromosome > 0) hit->edit_op[hit->mismatches].pos = readpos + 2;
						else		 hit->edit_op[hit->mismatches].pos = READ_LENGTH - readpos;
					hit->edit_op[hit->mismatches].mm = 0;
					hit->mismatches++;
					hit->gaps++;
				}
				else {
					if (STATISTICS) BREAK_TB_IN_GLOBAL_ALIGNMENT++;
					return 0;	// discard hit
				}
					
				i--;
				if (hit->chromosome > 0) chrpos--;
					else chrpos++;
				break;
			}
			
			case UP: {
				
				if (hit->mismatches < NUM_EDIT_OPS && (!STRINGENT_GAPLIMIT || hit->gaps < NUM_GAPS)) {
					if (STATISTICS && hit->gaps >= NUM_GAPS) W++;
					if (hit->chromosome > 0) hit->edit_op[hit->mismatches].pos = -readpos - 1;
						else		 hit->edit_op[hit->mismatches].pos = -READ_LENGTH + readpos; 
					hit->edit_op[hit->mismatches].mm = 0;
					hit->mismatches++;
					hit->gaps++;
				}
				else {
					if (STATISTICS) BREAK_TB_IN_GLOBAL_ALIGNMENT++;
					return 0;	// discard hit
				}
				
				i = i+(j>K);
				j--;
				readpos--;
				break;
			}
		}
		
	}
	
	if (hit->chromosome > 0) start_offset += i + (j>K)*(j-K) - j;
	else hit->length -= i + (j>K)*(j-K) - j;
	
	// updating hit start:
	if (start_offset > 0) {
		if (hit->blockoffset + start_offset >= strlen(BLOCK_TABLE[hit->startblock].seq)) get_genome_pos_right(hit->startblock, hit->blockoffset, hit->strain, start_offset, 1);
		else {
			STARTBLOCK[hit->strain] = hit->startblock;
			BLOCKOFFSET[hit->strain] = hit->blockoffset + start_offset;
		}
	}
	else if (start_offset < 0) {
		if (-start_offset > hit->blockoffset) get_genome_pos_left(hit->startblock, hit->blockoffset, hit->strain, -start_offset, 1);
		else {
			STARTBLOCK[hit->strain] = hit->startblock;
			BLOCKOFFSET[hit->strain] = hit->blockoffset + start_offset;
		}
	}

	hit->strainpos = hit->strainpos + start_offset;
	hit->startblock = STARTBLOCK[hit->strain];
	hit->blockoffset = BLOCKOFFSET[hit->strain];
	
	// updating hit length:
	hit->length += READ_LENGTH - hit->length;
	
	// successfully aligned
	return 1;
}

