// Authors: Korbinian Schneeberger and Joerg Hagmann
// Copyright (C) 2008/09 by Max-Planck Institute for Developmental Biology, Tuebingen, Germany

#include "genomemapper.h"

int read_meta_index_header();
int read_meta_index();
int read_index();

int build_index() 
{
	// handle meta information
	read_meta_index_header();
	alloc_index_memory();
	read_meta_index();

	// initialize with meta information
	init_from_meta_index();

	// handle index information
	read_index();

	return(0);
}

int read_meta_index_header() 
{
	int i;
	if (VERBOSE) { printf("Reading in meta index\n"); }

	if (fread(&REV_IDX_EXISTS, sizeof(char), 1, META_INDEX_FP) == 0) {
		fprintf(stderr, "ERROR: cant read meta index file\n");
		exit(0);
	}
	
	if (!REV_IDX_EXISTS && MAP_REVERSE) 
		fprintf(stderr, "\n!!! WARNING: Index file doesn't contain reverse index: mapping to reverse strand cannot be done!\n\n");
	
	if (fread(&INDEX_DEPTH, sizeof(int), 1, META_INDEX_FP) == 0) {
		fprintf(stderr, "ERROR: cant read meta index file\n");
		exit(0);
	}
	if (VERBOSE) { printf("\tIndex depth is %d\n", INDEX_DEPTH); }
	
	if (HITLEN_LIMIT == 0) HITLEN_LIMIT = INDEX_DEPTH;
	else if (HITLEN_LIMIT < INDEX_DEPTH) {
		fprintf(stderr, "\n!!! WARNING: Hitlength limit is smaller than seedlength, it will be set to seedlength!\n\n");
		HITLEN_LIMIT = INDEX_DEPTH;
	}	
		
	if (fread(&NUM_CHROMOSOMES, sizeof(int), 1, META_INDEX_FP) == 0) {
		fprintf(stderr, "ERROR: cant read meta index file\n");
		exit(0);
	}
	if (VERBOSE) { printf("\tNb of chromosomes is %d\n", NUM_CHROMOSOMES); }
	
	// alloc space for chomosome lengths
	if ((CHR_LENGTH = (unsigned int *) malloc (NUM_CHROMOSOMES * sizeof(unsigned int*))) == NULL) {
		fprintf(stderr, "ERROR : not enough memory for genome memory\n");
		exit(1);
	}
	// and descriptions
	if ((CHR_DESC = (char**) malloc (NUM_CHROMOSOMES * sizeof(char**))) == NULL) {
		fprintf(stderr, "ERROR : not enough memory for genome description\n");
		exit(1);
	}
	

	if (fread(&NUM_POS, sizeof(int), 1, META_INDEX_FP) == 0) {
		fprintf(stderr, "ERROR: cant read meta index file\n");
		exit(0);
	}

	if (fread(&LONGEST_CHROMOSOME, sizeof(int), 1, META_INDEX_FP) == 0) {
		fprintf(stderr, "ERROR: cant read meta index file\n");
		exit(0);
	}

	// control:
	/*if (INDEX_DEPTH > 4 && INDEX_DEPTH < 14) {
		printf("Index depth not between 5 and 13\nBroken meta index file?\n");
		exit(1);
	}*/
	
	// read strain information
	if (fread(&NUM_STRAINS, sizeof(unsigned int), 1, META_INDEX_FP) == 0) {
		fprintf(stderr, "ERROR: cant read meta index file\n");
		exit(0);
	}
	STRAIN_DESC = (char **) malloc((NUM_STRAINS) * sizeof(char *));
	SEQ = (char **) malloc((NUM_STRAINS+1) * sizeof(char *));
	SEQS = (int *) malloc((NUM_STRAINS+1) * sizeof(int));
	for (i=0; i!=NUM_STRAINS; ++i) {
		STRAIN_DESC[i] = (char *) malloc(100 * sizeof(char));
		if (fread(&STRAIN_DESC[i][0], sizeof(char), 100, META_INDEX_FP) == 0) {
			fprintf(stderr, "ERROR: cant read meta index file\n");
			exit(0);
		}
		
		SEQ[i] = (char *) malloc(MAX_READ_LENGTH * sizeof(char));
		SEQS[i] = -1;
	}
	SEQ[NUM_STRAINS] = (char *) malloc(MAX_READ_LENGTH * sizeof(char));
	SEQS[NUM_STRAINS] = -1;
	STARTBLOCK 	= (unsigned int *) malloc((NUM_STRAINS + 1) * sizeof(unsigned int));
	BLOCKOFFSET = (unsigned int *) malloc((NUM_STRAINS + 1) * sizeof(unsigned int));
	STRAINPOS   = (int *) malloc((NUM_STRAINS + 1) * sizeof(int));
	
	// read block table:
	if (fread(&NUM_BLOCKS, sizeof(unsigned int), 1, META_INDEX_FP) == 0) {
		fprintf(stderr, "ERROR: cant read meta index file\n");
		exit(0);
	}
	
	BLOCK_TABLE = (BLOCK_TABLE_ENTRY *) malloc (NUM_BLOCKS * sizeof(BLOCK_TABLE_ENTRY));
	
	if (fread(BLOCK_TABLE, sizeof(BLOCK_TABLE_ENTRY), NUM_BLOCKS, META_INDEX_FP) == 0) {
		fprintf(stderr, "ERROR: cant read meta index file (blocktable)\n");
		exit(0);
	}
	
	return(0);
}

int read_meta_index() 
{
 	META_INDEX_ENTRY file_entry;
	int used_slots = 0;
	int old_slot = INDEX_SIZE+1;
	int slot_rev;

	while (fread(&file_entry, sizeof(META_INDEX_ENTRY), 1, META_INDEX_FP) == 1) {

		if (old_slot != -file_entry.slot) used_slots++;
		
		if (file_entry.slot >= 0) {

			INDEX[file_entry.slot].num = file_entry.num;
			INDEX[file_entry.slot].last_entry = MEM_MGR.next_unused_entry;

			MEM_MGR.next_unused_entry += file_entry.num;
			MEM_MGR.num_bins += file_entry.num;
			
			if (file_entry.num > MAX_POSITIONS) MAX_POSITIONS = file_entry.num;
			
		}
		else if (MAP_REVERSE) {

			if (file_entry.slot == -2147483647) slot_rev = 0;
				else slot_rev = -file_entry.slot;
			
			INDEX_REV[slot_rev].num = file_entry.num;
			INDEX_REV[slot_rev].last_entry = MEM_MGR.next_unused_entry;
	
			MEM_MGR.next_unused_entry += file_entry.num;
			MEM_MGR.num_bins += file_entry.num;
			
			if (file_entry.num > MAX_POSITIONS) MAX_POSITIONS = file_entry.num;

		}
		
		old_slot = file_entry.slot;

	}
	
	fclose(META_INDEX_FP);

if (DEBUG) { printf("\tNb of used slots is %d\nFinished parsing meta index\n", used_slots); }

  	return 0;
}

int read_index() 
{
	unsigned int chr_num = 0;
	unsigned int chr;
	unsigned int chrlen; 
	unsigned int chr_slot_num;
	int slot;
	unsigned int slot_entry_num;
	unsigned int i;
	char chr_desc[CHR_DESC_LENGTH];
	
	if (VERBOSE) { printf("Reading in index\n"); }

	while (chr_num != NUM_CHROMOSOMES) {

		chr_num++;

		//HEADER OF CHROMOSOME ENTRY
		
		//chromosome
		if (fread(&chr, sizeof(unsigned int), 1, INDEX_FP) != 1) {
			printf("Early stop in index file (1).\nCorrupted file?\n");
			exit(1);
		}
		if (VERBOSE) { printf("\tchromosome ID is %d, ", chr+1); }
		
		//chromosome length
		if (fread(&chrlen, sizeof(unsigned int), 1, INDEX_FP) != 1) {
			printf("Early stop in index file (2).\nCorrupted file?\n");
			exit(1);
		}
		if (DEBUG) { printf("length %d\t", chrlen); }
		CHR_LENGTH[chr] = chrlen;
			
		//chromosome description
		if (fread(&chr_desc, sizeof(char), CHR_DESC_LENGTH, INDEX_FP) != CHR_DESC_LENGTH) {
			printf("Early stop in index file (3).\nCorrupted file?\n");
			exit(1);
		}
		if ((*(CHR_DESC+chr) = (char*) malloc (CHR_DESC_LENGTH * sizeof(char))) == NULL) {
			fprintf(stderr, "ERROR : not enough memory for genome description\n");
			exit(1);
		}
		strcpy(CHR_DESC[chr], chr_desc);
		if (VERBOSE) { printf("description is %s\n", CHR_DESC[chr]); }
	
		//number of slots for this chromosome
		if (fread(&chr_slot_num, sizeof(unsigned int), 1, INDEX_FP) != 1) {
			printf("Early stop in index file (4).\nCorrupted file?\n");
			exit(1);
		}

		for (i=0; i < (chr_slot_num * (1 + REV_IDX_EXISTS)); i++) {

			//HEADER OF SLOT ENTRY
			if (fread(&slot, sizeof(int), 1, INDEX_FP) != 1) {
				printf("Early stop in index file (4).\nCorrupted file?\n");
				exit(1);
			}
			
			if (fread(&slot_entry_num, sizeof(int), 1, INDEX_FP) != 1) {
				printf("Early stop in index file (5).\nCorrupted file?\n");
				exit(1);
			}

			if (slot >= 0) {

				if (fread((INDEX+slot)->last_entry, sizeof(STORAGE_ENTRY), slot_entry_num, INDEX_FP) != slot_entry_num) {
					printf("Early stop in index file (6).\nCorrupted file?\n");
					exit(1);
				}

	 			(INDEX+slot)->last_entry += slot_entry_num;

			}
			else {
				
				if (MAP_REVERSE) {
				
					slot = (slot == -2147483647)? 0: -slot;
	
					if (fread((INDEX_REV+slot)->last_entry, sizeof(STORAGE_ENTRY), slot_entry_num, INDEX_FP) != slot_entry_num) {
						printf("Early stop in index file (6r).\nCorrupted file?\n");
						exit(1);
					}
					(INDEX_REV+slot)->last_entry += slot_entry_num;
					
				}
				else {
					fseek(INDEX_FP, sizeof(STORAGE_ENTRY) * slot_entry_num, SEEK_CUR);
				}

			}

		} //for every position in a slot

	} // for every chromosome

	fclose(INDEX_FP);

	if (VERBOSE) { printf("Finished parsing index\n"); }
	
  	return(0);
}


char get_strain_base(char c)
{
	if (c > 64) return c;
	
	switch (c & 7) {
		case  1:	return 'A';
		case  3:	return 'C';
		case  7:	return 'G';
		case  4:	return 'T';
		case  6:	return 'N';
		case  0:	return '-';
	}
	
	return '\0';
} 

char get_ref_base(char c)
{
	if (c > 64) return c;
	
	switch (c & 56) {
		case  8:	return 'A';
		case  24:	return 'C';
		case  56:	return 'G';
		case  32:	return 'T';
		case  48:	return 'N';
		case  0:	return '-';
	}
	
	return '\0';
} 

char *decode_seq(char *seq, char* template)
{
	int i;
	seq[0] = '\0';
	for (i=0; i!=strlen(template); ++i) {
		if (template[i] > 64) strncat(seq, template+i, 1);
		else {
			switch (template[i]) {
				/*case 65:	strcat(seq, "A"); 	break;
				case 67:	strcat(seq, "C"); 	break;
				case 71:	strcat(seq, "G"); 	break;
				case 84:	strcat(seq, "T"); 	break;*/
				case 8:		strcat(seq, "(A-)");break;
				case 9:		strcat(seq, "A");	break;
				case 11:	strcat(seq, "(AC)");break;
				case 15:	strcat(seq, "(AG)");break;
				case 12:	strcat(seq, "(AT)");break;
				case 14:	strcat(seq, "(AN)");break;
				case 24:	strcat(seq, "(C-)");break;
				case 25:	strcat(seq, "(CA)");break;
				case 27:	strcat(seq, "C");	break;
				case 31:	strcat(seq, "(CG)");break;
				case 28:	strcat(seq, "(CT)");break;
				case 30:	strcat(seq, "(CN)");break;
				case 56:	strcat(seq, "(G-)");break;
				case 57:	strcat(seq, "(GA)");break;
				case 59:	strcat(seq, "(GC)");break;
				case 62:	strcat(seq, "(GN)");break;
				case 63:	strcat(seq, "G");	break;
				case 60:	strcat(seq, "(GT)");break;
				case 32:	strcat(seq, "(T-)");break;
				case 33:	strcat(seq, "(TA)");break;
				case 35:	strcat(seq, "(TC)");break;
				case 39:	strcat(seq, "(TG)");break;
				case 38:	strcat(seq, "(TN)");break;
				case 36:	strcat(seq, "T");	break;
				case 1:		strcat(seq, "(-A)");break;
				case 3:		strcat(seq, "(-C)");break;
				case 7:		strcat(seq, "(-G)");break;
				case 4:		strcat(seq, "(-T)");break;
				case 6:		strcat(seq, "(-N)");break;
				case 45:	strcat(seq, "-");	break;
				case 48:	strcat(seq, "(N-)");break;
				case 49:	strcat(seq, "(NA)");break;
				case 51:	strcat(seq, "(NC)");break;
				case 55:	strcat(seq, "(NG)");break;
				case 52:	strcat(seq, "(NT)");break;
				case 54:	strcat(seq, "N");	break;
			}
		}
	}
	strcat(seq,"\0");
	
	return seq;
}

char *decode_seq_reverse(char *seq, char* template)
{
	int i;
	seq[0] = '\0';
	for (i=strlen(template)-1; i>=0; --i) {
		switch (template[i]) {
			/*case 65:	strcat(seq, "A"); 	break;
			case 67:	strcat(seq, "C"); 	break;
			case 71:	strcat(seq, "G"); 	break;
			case 84:	strcat(seq, "T"); 	break;*/
			case 65:	strcat(seq, "T");	break;
			case 67:	strcat(seq, "G");	break;
			case 71:	strcat(seq, "C");	break;
			case 84:	strcat(seq, "A");	break;
			case 8:		strcat(seq, "(T-)");break;
			case 9:		strcat(seq, "T");	break;
			case 11:	strcat(seq, "(TG)");break;
			case 15:	strcat(seq, "(TC)");break;
			case 12:	strcat(seq, "(TA)");break;
			case 24:	strcat(seq, "(G-)");break;
			case 25:	strcat(seq, "(GT)");break;
			case 27:	strcat(seq, "G");	break;
			case 31:	strcat(seq, "(GC)");break;
			case 28:	strcat(seq, "(GA)");break;
			case 56:	strcat(seq, "(C-)");break;
			case 57:	strcat(seq, "(CT)");break;
			case 59:	strcat(seq, "(CG)");break;
			case 63:	strcat(seq, "C");	break;
			case 60:	strcat(seq, "(CA)");break;
			case 32:	strcat(seq, "(A-)");break;
			case 33:	strcat(seq, "(AT)");break;
			case 35:	strcat(seq, "(AG)");break;
			case 39:	strcat(seq, "(AC)");break;
			case 36:	strcat(seq, "A");	break;
			case 1:		strcat(seq, "(-T)");break;
			case 3:		strcat(seq, "(-G)");break;
			case 7:		strcat(seq, "(-C)");break;
			case 4:		strcat(seq, "(-A)");break;
			case 45:	strcat(seq, "-");	break;
			default:	if (template[i] > 64) strncat(seq, template+i, 1); break;
		}
	}
	strcat(seq,"\0");
	
	return seq;
}


char *decode_strainseq(char *seq, char* template)
{
	int i, lim = strlen(template);
	seq[0] = '\0';
	for (i=0; i!=lim; ++i) {
		if (template[i] > 64) strncat(seq, template+i, 1);
		else {
			switch (template[i] & 7) {
				case 1:	strcat(seq, "A"); break;
				case 3:	strcat(seq, "C"); break;
				case 7:	strcat(seq, "G"); break;
				case 4:	strcat(seq, "T"); break;
				case 6:	strcat(seq, "N"); break;
			}
		}
	}
	strcat(seq,"\0");
	
	return seq;
}


void print_blocktable(int lim)
{
	char *seq = malloc(BLOCK_SIZE * 5 * sizeof(char));
	// print block table (debugging)
	printf("-----------------------------------------------------------------\n");
	int i;
	printf("block|posR|off|ins|posS|chr|st|prev|next|fbns|lbns| len | seq\n");
	for (i=1; i<=lim; ++i) {
		printf("%5d|%4d|%3d|%3d|%4d|%3d|%2d|%4d|%4d|%4d|%4d|%5d|.%s.\n",
				i, BLOCK_TABLE[i].pos, BLOCK_TABLE[i].indel_offset, BLOCK_TABLE[i].ins_pos, BLOCK_TABLE[i].strainpos, BLOCK_TABLE[i].chr+1, BLOCK_TABLE[i].strain, BLOCK_TABLE[i].prev_block, BLOCK_TABLE[i].next_block, 
				BLOCK_TABLE[i].next_strain_front, BLOCK_TABLE[i].next_strain_end, (int)strlen(BLOCK_TABLE[i].seq), decode_seq(seq, BLOCK_TABLE[i].seq));
	}
	printf("-----------------------------------------------------------------\n");
	free(seq);
}
