// Authors: Korbinian Schneeberger and Joerg Hagmann
// Copyright (C) 2008 by Max-Planck Institute for Developmental Biology, Tuebingen, Germany

#include "gmindex.h"

#ifdef METHYLOME
int pos2bin(unsigned int slot, int conversion); 
int pos2bin_rev(unsigned int slot, int conversion); 
#else
int pos2bin(unsigned int slot); 
int pos2bin_rev(unsigned int slot);
#endif
int get_slot(char *seq, int pos);
#ifdef METHYLOME
int iterate_slots(char *seq, int pos, char part[MAX_INDEX_DEPTH], int currlength, int conversion);
#endif

int index_chromosome(unsigned int chr) 
{
	if (VERBOSE) { printf("\tBuilding index ..."); fflush(stdout); }
	unsigned int pos = 0;
	int spacer = 0;
#ifndef METHYLOME
	int slot = 0;
#endif
	POS p;
	
	HAS_SLOT = 0;

	while (spacer < CHR_LENGTH) {
		
		if (spacer < pos + INDEX_DEPTH - 1) {
			if (CHR_SEQ[spacer]=='A' || CHR_SEQ[spacer]=='T' || CHR_SEQ[spacer]=='C' || CHR_SEQ[spacer]=='G') {
				spacer++;
			}
			else {
				spacer++;
				POSITION += spacer - pos;
				pos = spacer;
				HAS_SLOT = 0;
			}
		}
		else {
			if (CHR_SEQ[spacer]=='A' || CHR_SEQ[spacer]=='T' || CHR_SEQ[spacer]=='C' || CHR_SEQ[spacer]=='G') {

#ifndef METHYLOME
				slot = get_slot(CHR_SEQ, pos); // yields also SLOT_REV if -r option wasn't set

				if(INDEX[slot] == NULL) {
					alloc_bin(slot);
					if (BUILD_REVERSE_INDEX) alloc_bin_rev(SLOT_REV);
				}

				pos2bin(slot);	// 0-initialized
				if (BUILD_REVERSE_INDEX) {
					pos2bin_rev(SLOT_REV); // 0-initialized
				}

               	                HAS_SLOT = 1;

#else
					
				char part[MAX_INDEX_DEPTH];
				part[0] = 0;
				//printf("#################\n");
				iterate_slots(CHR_SEQ, pos, part, 0, 0);
					
#endif
				
				spacer++;
				pos++;
				POSITION++;
			}
			else {
				spacer++;
				POSITION += spacer - pos;				
				pos = spacer;
				HAS_SLOT = 0;
			}
		}
		
		if (POSITION >= BLOCK_SIZE) {
			if (BLOCK == BLOCK_TABLE_SIZE - 1) {
				fprintf(stderr, "ERROR: Too large chrs/contigs or too many chrs/contigs! Split input file into many smaller ones!\n");
				exit(0);
			}
			BLOCK++;
			POSITION %= BLOCK_SIZE;
			
			p.chr = chr;
			p.pos = pos - POSITION;
			BLOCK_TABLE[BLOCK] = p;
		}
	}

	if (VERBOSE) printf("... done\n");

	return 0;
}

#ifdef METHYLOME
int pos2bin(unsigned int slot, int conversion) 
#else
int pos2bin(unsigned int slot) 
#endif
{
	BIN_EXT **bin_ext;
	BIN *bin, *binrev;
	unsigned int num, numrev;

#ifndef METHYLOME
	bin = INDEX[slot];
	num = bin->num_pos;
#else
	if (conversion == 1) {
		bin = INDEX_CT[slot];
	}
	else {
		bin = INDEX_GA[slot];
	}
	num = bin->num_pos;
#endif

	numrev = 0;

#ifndef METHYLOME
	if (BUILD_REVERSE_INDEX && INDEX_REV[slot] != NULL) {
		binrev = INDEX_REV[slot];
#else
	if (BUILD_REVERSE_INDEX && ((conversion == 1 && INDEX_REV_CT[slot] != NULL) || (conversion == 2 && INDEX_REV_GA[slot] != NULL))) {
		if (conversion == 1) {
	                binrev = INDEX_REV_CT[slot];
        	}
	        else {	
        	        binrev = INDEX_REV_GA[slot];
	        }
#endif
		numrev = binrev->num_pos;
	}

	if (num == 0 && numrev == 0) {
		USED_SLOTS[NUM_USED_SLOTS] = slot;
		NUM_USED_SLOTS++;
	}
	if (num == 0) {
		SLOT_COUNTER++;
	}

	if (num < BIN_SIZE) {
		memcpy(&(bin->ids[num].id[0]), &BLOCK, 3 * sizeof(char));
		memcpy(&(bin->ids[num].id[3]), &POSITION, sizeof(unsigned char));
		
		bin->num_pos++;
	}
	else {
		bin_ext = &(bin->bin_ext);
		if (*bin_ext == 0) {
			*bin_ext = alloc_bin_ext();
			memcpy(&(*bin_ext)->ids[0].id, &BLOCK, 3 * sizeof(char));
			memcpy(&(*bin_ext)->ids[0].id[3], &POSITION, sizeof(unsigned char));
			bin->num_pos++;
			bin->last_bin_ext = *bin_ext;
			return 0;
		}
		else {
			bin_ext = &(bin->last_bin_ext);
			if ((num % BIN_SIZE_EXT) != BIN_SIZE) {
				memcpy(&(*bin_ext)->ids[(num-BIN_SIZE) % BIN_SIZE_EXT].id, &BLOCK, 3 * sizeof(char));
				memcpy(&(*bin_ext)->ids[(num-BIN_SIZE) % BIN_SIZE_EXT].id[3], &POSITION, sizeof(unsigned char));
				bin->num_pos++;
				return 0;
			}
			else  {
				bin_ext = &((*bin_ext)->bin_ext);
				*bin_ext = alloc_bin_ext();
				memcpy(&(*bin_ext)->ids[0].id, &BLOCK, 3 * sizeof(char));
				memcpy(&(*bin_ext)->ids[0].id[3], &POSITION, sizeof(unsigned char));
				bin->num_pos++;
				bin->last_bin_ext = *bin_ext;
			}
		}
	}

	return(0);
}

#ifdef METHYLOME
int pos2bin_rev(unsigned int slot, int conversion)
#else
int pos2bin_rev(unsigned int slot)
#endif
{
	BIN_EXT **bin_ext;
	BIN *bin, *binfwd;
	unsigned int num, numfwd;
	
#ifndef METHYLOME
	bin = INDEX_REV[slot];
	num = bin->num_pos;
#else
	if (conversion == 1) {
		bin = INDEX_REV_CT[slot];
	}
	else {
		bin = INDEX_REV_GA[slot];
	}
	num = bin->num_pos;
#endif

	numfwd = 0;
#ifndef METHYLOME
	if (INDEX[slot] != NULL) {
		binfwd = INDEX[slot];
#else 
	if ((conversion == 1 && INDEX_CT[slot] != NULL) || (conversion == 2 && INDEX_GA[slot] != NULL)) {
		if (conversion == 1) {
	                binfwd = INDEX_CT[slot];
        	}
	        else {
        	        binfwd = INDEX_GA[slot];
	        }
#endif
		numfwd = binfwd->num_pos;
	}

	if (num == 0 && numfwd == 0) {
		USED_SLOTS[NUM_USED_SLOTS] = slot;
		NUM_USED_SLOTS++;
	}

	if (num < BIN_SIZE) {
		memcpy(&(bin->ids[num].id[0]), &BLOCK, 3 * sizeof(char));
		memcpy(&(bin->ids[num].id[3]), &POSITION, sizeof(unsigned char));
		
		bin->num_pos++;
	}
	else {
		bin_ext = &(bin->bin_ext);
		if (*bin_ext == 0) {
			*bin_ext = alloc_bin_ext();
			memcpy(&(*bin_ext)->ids[0].id, &BLOCK, 3 * sizeof(char));
			memcpy(&(*bin_ext)->ids[0].id[3], &POSITION, sizeof(unsigned char));
			bin->num_pos++;
			bin->last_bin_ext = *bin_ext;
			return 0;
		}
		else {
			bin_ext = &(bin->last_bin_ext);
			if ((num % BIN_SIZE_EXT) != BIN_SIZE) {
				memcpy(&(*bin_ext)->ids[(num-BIN_SIZE) % BIN_SIZE_EXT].id, &BLOCK, 3 * sizeof(char));
				memcpy(&(*bin_ext)->ids[(num-BIN_SIZE) % BIN_SIZE_EXT].id[3], &POSITION, sizeof(unsigned char));
				bin->num_pos++;
				return 0;
			}
			else  {
				bin_ext = &((*bin_ext)->bin_ext);
				*bin_ext = alloc_bin_ext();
				memcpy(&(*bin_ext)->ids[0].id, &BLOCK, 3 * sizeof(char));
				memcpy(&(*bin_ext)->ids[0].id[3], &POSITION, sizeof(unsigned char));
				bin->num_pos++;
				bin->last_bin_ext = *bin_ext;
			}
		}
	}

	return(0);
}

#ifdef METHYLOME
int iterate_slots(char *seq, int pos, char part[MAX_INDEX_DEPTH], int currlength, int conversion) {

	int slot;
	//int i;

	if (currlength >= INDEX_DEPTH) {

		//for (i = 0; i < INDEX_DEPTH; i++) {
		//	printf("%c", part[i]);
		//}
		//printf("\n");

		// Get slot number
		slot = get_slot(&part[0], 0); // yields also SLOT_REV if -r option wasn't set

		if (conversion == 1 || conversion == 0) { 
	                if(INDEX_CT[slot] == NULL) {
				alloc_bin(slot, 1);
                	        if (BUILD_REVERSE_INDEX) {
					alloc_bin_rev(SLOT_REV, 1);
				}
	                }

			pos2bin(slot, 1);
	                if (BUILD_REVERSE_INDEX) {
        	        	pos2bin_rev(SLOT_REV, 1);
                	}
		}
		if (conversion == 2 || conversion == 0) {
			if(INDEX_GA[slot] == NULL) {
                                alloc_bin(slot, 2);
                                if (BUILD_REVERSE_INDEX) alloc_bin_rev(SLOT_REV, 2);
                        }

			pos2bin(slot, 2);
                        if (BUILD_REVERSE_INDEX) {
                                pos2bin_rev(SLOT_REV, 2);
                        }
		}

		// Conversions do not allow for easy re-use of slot number... does not matter...
		HAS_SLOT = 0;

	}
	else {

		if (seq[pos+currlength] == 'C') {

			part[currlength] = 'C';
			iterate_slots(seq, pos, part, currlength+1, conversion);

			if (conversion == 0 || conversion == 1) {
				part[currlength] = 'T';
				iterate_slots(seq, pos, part, currlength+1, 1);
			}

		}
		else {

			if (seq[pos+currlength] == 'G') {

				part[currlength] = 'G';
	                        iterate_slots(seq, pos, part, currlength+1, conversion);

				if (conversion == 0 || conversion == 2) {
					part[currlength] = 'A';
					iterate_slots(seq, pos, part, currlength+1, 2);
				}

			}
			else {
				part[currlength] = seq[pos+currlength];
				iterate_slots(seq, pos, part, currlength+1, conversion);
			}
		}

	}

	return(0);
}
#endif

int get_slot(char *seq, int pos)
{
	unsigned int slot = 0;
	unsigned int i;
	int c = 0;
	
	if (HAS_SLOT == 0) { 
		SLOT_REV = 0;
		for (i=0; i<INDEX_DEPTH; i++) {
			if (seq[pos+i] == 'A') {
				c = 0;
			}
			else {
				if (seq[pos+i] == 'C') {
					c = 1;
				}
				else {
					if (seq[pos+i] == 'G') {
						c = 2;
					}
					else {
						if (seq[pos+i] == 'T') {
							c = 3;
						}
						else {
							fprintf(stderr, "ERROR: Reached unallowed character in calculateSlot %c at position %d \n", seq[pos+i], (pos+i));
							exit(1);
						}
					}
				}
			}
			slot = slot + POWER[i] * c;
			if (BUILD_REVERSE_INDEX) SLOT_REV += POWER[INDEX_DEPTH - i - 1] * (c ^ 3);
		}
	}
	else {

		slot = SLOT;
		slot >>= 2;

		if (BUILD_REVERSE_INDEX) {
			SLOT_REV <<= 34 - INDEX_DEPTH * 2;
			SLOT_REV >>= 32 - INDEX_DEPTH * 2;
		}

		if (seq[pos+INDEX_DEPTH-1] == 'A') {
			slot = slot | BINARY_CODE[0];
			if (BUILD_REVERSE_INDEX) SLOT_REV |= 3;
		}
		else if (seq[pos+INDEX_DEPTH-1] == 'C') {
			slot = slot | BINARY_CODE[1];
			if (BUILD_REVERSE_INDEX) SLOT_REV |= 2;
		}
		else if (seq[pos+INDEX_DEPTH-1] == 'G') {
			slot = slot | BINARY_CODE[2];
			if (BUILD_REVERSE_INDEX) SLOT_REV |= 1;
		}
		else { //if (seq[pos+INDEX_DEPTH-1] == 'T') {
			slot = slot | BINARY_CODE[3];
		}

	}

	SLOT = slot;

	return(slot);
}

