// Authors: Korbinian Schneeberger and Joerg Hagmann
// Copyright (C) 2008 by Max-Planck Institute for Developmental Biology, Tuebingen, Germany

#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>

#include "genomemapper.h"

#ifndef METHYLOME
int read_meta_index_header();
int read_meta_index();
#else
int read_meta_index_header(int conversion);
int read_meta_index(int conversion);
#endif
int read_chr_index();
int mmap_indices();
int gm_mmap(size_t length, int prot, int flags, int fd, off_t offset, void *map);
int mmap_full_file(const char *path, void **map);

int build_index() 
{
	// handle meta information
#ifndef METHYLOME
	read_meta_index_header();  
	alloc_index_memory();
	read_meta_index();
#else
	read_meta_index_header(1);
	read_meta_index_header(2);

	alloc_index_memory(1); 
	alloc_index_memory(2);

	read_meta_index(1);
	read_meta_index(2);
#endif

	// initialize with meta information
	init_from_meta_index(); 

	// mmap map files into memory
#ifndef METHYLOME
	mmap_indices(); 
#else
	mmap_indices(1);
	mmap_indices(2);
#endif

	// handle index information
	read_chr_index();

	return(0);
}

#ifndef METHYLOME
int read_meta_index_header() {
	POS** BTP = &BLOCK_TABLE;
	
#else
int read_meta_index_header(int conversion) {

	FILE *META_INDEX_FP;
	POS** BTP;

	if (conversion == 1) {
		META_INDEX_FP = META_INDEX_CT_FP;
		BTP = &BLOCK_TABLE_CT;
	}
	else {
		META_INDEX_FP = META_INDEX_GA_FP;
		BTP = &BLOCK_TABLE_GA;
	}
#endif

	int dummy;

	if (VERBOSE) { printf("Reading in meta index\n"); }

	////////////////////////////////////////////////////////////////////////////////////
	// Get rev index flag
	if (fread(&REV_IDX_EXISTS, sizeof(char), 1, META_INDEX_FP) == 0) {
                fprintf(stderr, "ERROR: cant read meta index file\n");
                exit(1);
        }	
	
	if (!REV_IDX_EXISTS && MAP_REVERSE) { 
		fprintf(stderr, "\nERROR: Index file doesn't contain reverse index: mapping to reverse strand cannot be done.\n(Did you forget to set -r in genomemapper command line?)\n\n");
		exit(1);
	}
	
	////////////////////////////////////////////////////////////////////////////////////
	// Index depth
	if (fread(&INDEX_DEPTH, sizeof(int), 1, META_INDEX_FP) == 0) {
                fprintf(stderr, "ERROR: cant read meta index file\n");
                exit(1);
        }
	if (VERBOSE) { printf("\tIndex depth is %d\n", INDEX_DEPTH); }

	if (USR_READ_LENGTH < INDEX_DEPTH) {
		fprintf(stderr, "WARNING: User defined read length is smaller than the index depth. It will be set on the index depth!\n");
		USR_READ_LENGTH = INDEX_DEPTH;
	}
	
	if (HITLEN_LIMIT == 0) HITLEN_LIMIT = INDEX_DEPTH;
	else if (HITLEN_LIMIT < INDEX_DEPTH) {
		fprintf(stderr, "\nERROR: Hitlength limit is smaller than seedlength.\n\n");
		exit(1);
	}
	
	if (SEED_EXTRA_EXTEND_THRESHOLD < DEFAULT_MAX_OCC_PER_SEED) {
		EXTRA_SEED_EXTEND = HITLEN_LIMIT - INDEX_DEPTH;
		if (EXTRA_SEED_EXTEND == 0) SEED_EXTRA_EXTEND_THRESHOLD = DEFAULT_MAX_OCC_PER_SEED;
	}
	
	////////////////////////////////////////////////////////////////////////////////////
	// Get number of chromosomes
	if (fread(&NUM_CHROMOSOMES, sizeof(int), 1, META_INDEX_FP) == 0) {
                fprintf(stderr, "ERROR: cant read meta index file\n");
                exit(1);
        }	
	if (VERBOSE) { printf("\tNb of chromosomes is %d\n", NUM_CHROMOSOMES); }
	
	// alloc space for chomosome size
	if ((CHR_LENGTH = (unsigned int *) malloc (NUM_CHROMOSOMES * sizeof(unsigned int*))) == NULL) {
		fprintf(stderr, "ERROR : not enough memory for genome memory\n");
		exit(1);
	}
	// and descriptions
	if ((CHR_DESC = (char**) malloc (NUM_CHROMOSOMES * sizeof(char**))) == NULL) {
		fprintf(stderr, "ERROR : not enough memory for genome description\n");
		exit(1);
	}
	
	////////////////////////////////////////////////////////////////////////////////////
	// Get number of positions in index?
	if (fread(&dummy, sizeof(int), 1, META_INDEX_FP) == 0) {
                fprintf(stderr, "ERROR: cant read meta index file\n");
                exit(1);
        }

	////////////////////////////////////////////////////////////////////////////////////
	// Size of longest chromosome
	if (fread(&LONGEST_CHROMOSOME, sizeof(int), 1, META_INDEX_FP) == 0) {
                fprintf(stderr, "ERROR: cant read meta index file\n");
                exit(1);
        }

	//////////////////////////////////////////////////////////////
	// read block table:	
	//////////////////////////////////////////////////////////////
	unsigned int blocks;
	if (fread(&blocks, sizeof(unsigned int), 1, META_INDEX_FP) == 0) {
                fprintf(stderr, "ERROR: cant read meta index file\n");
                exit(1);
        }	
	
	(*BTP) = (POS *) malloc (blocks * sizeof(POS));
	
	 if (fread((*BTP), sizeof(POS), blocks, META_INDEX_FP) == 0) {
                fprintf(stderr, "ERROR: cant read meta index file (blocktable)\n");
                exit(1);
        }

	
	// print block table (debugging)
	/*printf("-------------------------------------------\n");
	int i;
	for (i=0; i!=blocks; ++i) {
		printf("| block %5d | pos %9d | chr %5d |\n", i, BLOCK_TABLE[i].pos, BLOCK_TABLE[i].chr+1);
	}
	printf("-------------------------------------------\n");*/
	
	return(0);
}

#ifndef METHYLOME
int read_meta_index() {
#else
int read_meta_index(int conversion) {
	
	FILE *META_INDEX_FP;
	INDEX_ENTRY *INDEX;
	INDEX_ENTRY *INDEX_REV;

	if (conversion == 1) {
		META_INDEX_FP = META_INDEX_CT_FP;
		INDEX = INDEX_CT;
		INDEX_REV = INDEX_REV_CT;
	}
	else {
		META_INDEX_FP = META_INDEX_GA_FP;
		INDEX = INDEX_GA;
		INDEX_REV = INDEX_REV_GA;
	}
#endif


 	META_INDEX_ENTRY file_entry;
	int used_slots = 0;
	int old_slot = INDEX_SIZE+1;
	int slot_rev;
	unsigned int index_offset = 0, index_rev_offset = 0;

	while (fread(&file_entry, sizeof(META_INDEX_ENTRY), 1, META_INDEX_FP) == 1) {

		if (old_slot != -file_entry.slot) used_slots++;
		
		if (file_entry.slot >= 0) {

			index_offset += file_entry.num;

			INDEX[file_entry.slot].num = file_entry.num;
			INDEX[file_entry.slot].offset = index_offset ;

			if (file_entry.num > MAX_POSITIONS) MAX_POSITIONS = file_entry.num;

		}
		else if (MAP_REVERSE) {

			if (file_entry.slot == -2147483647) slot_rev = 0;
				else slot_rev = -file_entry.slot;
			
			index_rev_offset += file_entry.num;

			INDEX_REV[slot_rev].num = file_entry.num;
			INDEX_REV[slot_rev].offset = index_rev_offset;
	
			if (file_entry.num > MAX_POSITIONS) MAX_POSITIONS = file_entry.num;

		}
		
		old_slot = file_entry.slot;

	}
	
	fclose(META_INDEX_FP);

  	return 0;
}

int read_chr_index() 
{
	unsigned int chr_num = 0;
	unsigned int chr;
	unsigned int chrlen; 
	//unsigned int chr_slot_num;
	//int slot;
	//unsigned int slot_entry_num;
	//unsigned int i;
	char chr_desc[CHR_DESC_LENGTH];
	
	if (VERBOSE) { printf("Reading in index\n"); }

	while (chr_num != NUM_CHROMOSOMES) {

		chr_num++;

		//HEADER OF CHROMOSOME ENTRY
		
		//chromosome
		if (fread(&chr, sizeof(unsigned int), 1, CHR_INDEX_FP) != 1) {
			fprintf(stderr,"Early stop in index file (1).\nCorrupted file?\n");
			exit(1);
		}
		if (VERBOSE) { printf("\tchromosome ID is %d, ", chr+1); }
		
		//chromosome length
		if (fread(&chrlen, sizeof(unsigned int), 1, CHR_INDEX_FP) != 1) {
			fprintf(stderr,"Early stop in index file (2).\nCorrupted file?\n");
			exit(1);
		}
		CHR_LENGTH[chr] = chrlen;
			
		//chromosome description
		if (fread(&chr_desc, sizeof(char), CHR_DESC_LENGTH, CHR_INDEX_FP) != CHR_DESC_LENGTH) {
			fprintf(stderr,"Early stop in index file (3).\nCorrupted file?\n");
			exit(1);
		}
		if ((*(CHR_DESC+chr) = (char*) malloc (CHR_DESC_LENGTH * sizeof(char))) == NULL) {
			fprintf(stderr, "ERROR : not enough memory for genome description\n");
			exit(1);
		}
		strcpy(CHR_DESC[chr], chr_desc);
		if (VERBOSE) { printf("description is %s\n", CHR_DESC[chr]); }
	
	} // for every chromosome

	fclose(CHR_INDEX_FP);

	if (VERBOSE) { printf("Finished parsing index\n"); }
	
  	return(0);
}


////////////////////////////////////////////////////////
// Code originally written by Andre Noll.
// Code adapted from Paraslash.
////////////////////////////////////////////////////////

#ifndef METHYLOME
int mmap_indices() {
#else
int mmap_indices(int conversion) {
	char INDEX_FWD_FILE_NAME[500];
	char INDEX_REV_FILE_NAME[500];
	if (conversion == 1) {
		strcpy(INDEX_FWD_FILE_NAME, INDEX_FWD_CT_FILE_NAME); 
		strcpy(INDEX_REV_FILE_NAME, INDEX_REV_CT_FILE_NAME);	
	}
	else {
		strcpy(INDEX_FWD_FILE_NAME, INDEX_FWD_GA_FILE_NAME);
                strcpy(INDEX_REV_FILE_NAME, INDEX_REV_GA_FILE_NAME);
	}
#endif

	void *fwd;
	void *rev;

	// Map fwd index
	if (mmap_full_file(INDEX_FWD_FILE_NAME, &fwd) != 0) {
		perror("ERROR: Could not get file status\n");
                exit(1);
	} 
	// Map rev index
	if (MAP_REVERSE == 1) {	
		if (mmap_full_file(INDEX_REV_FILE_NAME, &rev) != 0) {
        	        perror("ERROR: Could not get file status\n");
                	exit(1);
		}
        }

#ifndef METHYLOME
	INDEX_FWD_MMAP = fwd;
	INDEX_REV_MMAP = rev;
#else
	if (conversion == 1) {
		INDEX_FWD_CT_MMAP = fwd;
        	INDEX_REV_CT_MMAP = rev;
	}
	else {
		INDEX_FWD_GA_MMAP = fwd;
        	INDEX_REV_GA_MMAP = rev;
	}
#endif

	return 0;
}

int mmap_full_file(const char *path, void **map) {

	int fd, ret, mmap_prot, mmap_flags, open_mode;
	struct stat file_status;
	size_t size;

	// Set modus
	open_mode = O_RDONLY;
	mmap_prot = PROT_READ;
	mmap_flags = MAP_SHARED|MAP_NORESERVE;

	// Open file to get file size
	ret = open(path, open_mode, 0);
	if (ret < 0) {
		perror("ERROR: Could not open file\n");
                exit(1);
	}
	fd = ret;
        if (fstat(fd, &file_status) < 0) {
		perror("ERROR: Could not get file status\n");
		exit(1);
	}

	// Map file
	size = file_status.st_size;
	ret = gm_mmap(size, mmap_prot, mmap_flags, fd, 0, map);

	return ret;
	
}

int gm_mmap(size_t length, int prot, int flags, int fd, off_t offset, void *map) {

	void **m = map;

	*m = mmap(NULL, length, prot, flags, fd, offset);

	if (*m == MAP_FAILED) {
		perror("ERROR: Could not memory map file\n");
                exit(1);
	}

	return 0;
}





