// Authors: Korbinian Schneeberger, Stephan Ossowski and Joerg Hagmann
// Copyright (C) 2008 by Max-Planck Institute for Developmental Biology, Tuebingen, Germany

#include "genomemapper.h"

int read_meta_index_header();
int read_meta_index();
int read_index();

int build_index() 
{
	// handle meta information
	read_meta_index_header();
	alloc_index_memory();
	read_meta_index();

	// initialize with meta information
	init_from_meta_index();

	// handle index information
	read_index();

	return(0);
}

int read_meta_index_header() 
{

	if (VERBOSE) { printf("Reading in meta index\n"); }

	fread(&REV_IDX_EXISTS, sizeof(char), 1, META_INDEX_FP);
	if (DEBUG) { printf("\tReverse index exists? %d\n", REV_IDX_EXISTS); }
	
	if (!REV_IDX_EXISTS && MAP_REVERSE) 
		fprintf(stderr, "\n!!! WARNING: Index file doesn't contain reverse index: mapping to reverse strand cannot be done!\n\n");
	
	fread(&INDEX_DEPTH, sizeof(int), 1, META_INDEX_FP);
	if (VERBOSE) { printf("\tIndex depth is %d\n", INDEX_DEPTH); }
	
	if (HITLEN_LIMIT == 0) HITLEN_LIMIT = INDEX_DEPTH;
	else if (HITLEN_LIMIT < INDEX_DEPTH) {
		fprintf(stderr, "\n!!! WARNING: Hitlength limit is smaller than seedlength, it will be set to seedlength!\n\n");
		HITLEN_LIMIT = INDEX_DEPTH;
	}	
		
	fread(&NUM_CHROMOSOMES, sizeof(int), 1, META_INDEX_FP);
	if (VERBOSE) { printf("\tNb of chromosomes is %d\n", NUM_CHROMOSOMES); }
	
	// alloc space for chomosome lengths
	if ((CHR_LENGTH = (unsigned int *) malloc (NUM_CHROMOSOMES * sizeof(unsigned int*))) == NULL) {
		fprintf(stderr, "ERROR : not enough memory for genome memory\n");
		exit(1);
	}
	// and descriptions
	if ((CHR_DESC = (char**) malloc (NUM_CHROMOSOMES * sizeof(char**))) == NULL) {
		fprintf(stderr, "ERROR : not enough memory for genome description\n");
		exit(1);
	}
	

	fread(&NUM_POS, sizeof(int), 1, META_INDEX_FP);
	if (DEBUG) { printf("\tNb of positions in index is %d\n", NUM_POS); }

	fread(&LONGEST_CHROMOSOME, sizeof(int), 1, META_INDEX_FP);
	if (DEBUG) { printf("\tLength of longest chromosome is %d\n", LONGEST_CHROMOSOME); }

	// control:
	/*if (INDEX_DEPTH > 4 && INDEX_DEPTH < 14) {
		printf("Index depth not between 5 and 13\nBroken meta index file?\n");
		exit(1);
	}*/
	
	// read block table:	
	unsigned int blocks;
	fread(&blocks, sizeof(unsigned int), 1, META_INDEX_FP);
	
	BLOCK_TABLE = (POS *) malloc (blocks * sizeof(POS));
	
	fread(BLOCK_TABLE, sizeof(POS), blocks, META_INDEX_FP);
	
	// print block table (debugging)
	/*printf("-------------------------------------------\n");
	int i;
	for (i=0; i!=blocks; ++i) {
		printf("| block %5d | pos %9d | chr %5d |\n", i, BLOCK_TABLE[i].pos, BLOCK_TABLE[i].chr+1);
	}
	printf("-------------------------------------------\n");*/
	
	return(0);
}

int read_meta_index() 
{
 	META_INDEX_ENTRY file_entry;
	int used_slots = 0;
	int old_slot = INDEX_SIZE+1;
	int slot_rev;

	while (fread(&file_entry, sizeof(META_INDEX_ENTRY), 1, META_INDEX_FP) == 1) {

		if (old_slot != -file_entry.slot) used_slots++;
		
		if (file_entry.slot >= 0) {

			INDEX[file_entry.slot].num = file_entry.num;
			INDEX[file_entry.slot].last_entry = MEM_MGR.next_unused_entry;

			MEM_MGR.next_unused_entry += file_entry.num;
			MEM_MGR.num_bins += file_entry.num;
			
			if (file_entry.num > MAX_POSITIONS) MAX_POSITIONS = file_entry.num;

		}
		else if (MAP_REVERSE) {

			if (file_entry.slot == -2147483647) slot_rev = 0;
				else slot_rev = -file_entry.slot;
			
			INDEX_REV[slot_rev].num = file_entry.num;
			INDEX_REV[slot_rev].last_entry = MEM_MGR.next_unused_entry;
	
			MEM_MGR.next_unused_entry += file_entry.num;
			MEM_MGR.num_bins += file_entry.num;
			
			if (file_entry.num > MAX_POSITIONS) MAX_POSITIONS = file_entry.num;

		}
		
		old_slot = file_entry.slot;

	}
	
	fclose(META_INDEX_FP);

	if (DEBUG) { printf("\tNb of used slots is %d\nFinished parsing meta index\n", used_slots); }

  	return 0;
}

int read_index() 
{
	unsigned int chr_num = 0;
	unsigned int chr;
	unsigned int chrlen; 
	unsigned int chr_slot_num;
	int slot;
	unsigned int slot_entry_num;
	unsigned int i;
	char chr_desc[CHR_DESC_LENGTH];
	
	if (VERBOSE) { printf("Reading in index\n"); }

	while (chr_num != NUM_CHROMOSOMES) {

		chr_num++;

		//HEADER OF CHROMOSOME ENTRY
		
		//chromosome
		if (fread(&chr, sizeof(unsigned int), 1, INDEX_FP) != 1) {
			printf("Early stop in index file (1).\nCorrupted file?\n");
			exit(1);
		}
		if (VERBOSE) { printf("\tchromosome ID is %d, ", chr+1); }
		
		//chromosome length
		if (fread(&chrlen, sizeof(unsigned int), 1, INDEX_FP) != 1) {
			printf("Early stop in index file (2).\nCorrupted file?\n");
			exit(1);
		}
		if (DEBUG) { printf("length %d\t", chrlen); }
		CHR_LENGTH[chr] = chrlen;
			
		//chromosome description
		if (fread(&chr_desc, sizeof(char), CHR_DESC_LENGTH, INDEX_FP) != CHR_DESC_LENGTH) {
			printf("Early stop in index file (3).\nCorrupted file?\n");
			exit(1);
		}
		if ((*(CHR_DESC+chr) = (char*) malloc (CHR_DESC_LENGTH * sizeof(char))) == NULL) {
			fprintf(stderr, "ERROR : not enough memory for genome description\n");
			exit(1);
		}
		strcpy(CHR_DESC[chr], chr_desc);
		if (VERBOSE) { printf("description is %s\n", CHR_DESC[chr]); }
	
		//number of slots for this chromosome
		if (fread(&chr_slot_num, sizeof(unsigned int), 1, INDEX_FP) != 1) {
			printf("Early stop in index file (4).\nCorrupted file?\n");
			exit(1);
		}
		if (DEBUG) { printf("\tNb of slots for this chromosome is %d\n", chr_slot_num); }

		for (i=0; i < (chr_slot_num * (1 + REV_IDX_EXISTS)); i++) {

			//HEADER OF SLOT ENTRY
			if (fread(&slot, sizeof(int), 1, INDEX_FP) != 1) {
				printf("Early stop in index file (4).\nCorrupted file?\n");
				exit(1);
			}
			
			if (fread(&slot_entry_num, sizeof(int), 1, INDEX_FP) != 1) {
				printf("Early stop in index file (5).\nCorrupted file?\n");
				exit(1);
			}
//printf("slot %d (%s) - num %d\n",slot, (slot>=0)? get_seq(slot): get_seq(-slot), slot_entry_num);
			if (slot >= 0) {

				if (fread((INDEX+slot)->last_entry, sizeof(STORAGE_ENTRY), slot_entry_num, INDEX_FP) != slot_entry_num) {
					printf("Early stop in index file (6).\nCorrupted file?\n");
					exit(1);
				}

	 			(INDEX+slot)->last_entry += slot_entry_num;			

			}
			else {
				
				if (MAP_REVERSE) {
				
					slot = (slot == -2147483647)? 0: -slot;
	
					if (fread((INDEX_REV+slot)->last_entry, sizeof(STORAGE_ENTRY), slot_entry_num, INDEX_FP) != slot_entry_num) {
						printf("Early stop in index file (6r).\nCorrupted file?\n");
						exit(1);
					}
					(INDEX_REV+slot)->last_entry += slot_entry_num;
					
				}
				else {
					fseek(INDEX_FP, sizeof(STORAGE_ENTRY) * slot_entry_num, SEEK_CUR);
				}

			}

		} //for every position in a slot

	} // for every chromosome

	fclose(INDEX_FP);

	if (VERBOSE) { printf("Finished parsing index\n"); }
	
  	return(0);
}
