#include "prepstrain.h"

void usage();

int main(int argc, char *argv[])
{
	int i;
	
	// INPUT
	char has_gen = 0, has_snp = 0, has_ins = 0, has_del = 0, has_out = 0;
	for (i = 1; i < argc; i++) {
		
		//Genome file
		if(strcmp(argv[i],"-g")==0){
			if(i+1 > argc - 1){ usage(); exit(1); }
			i++;
			strcpy(GEN_FILE_NAME, argv[i]);
			has_gen = 1;
		}

		//SNP file
		if(strcmp(argv[i],"-s")==0){
			if(i+1 > argc - 1){ usage(); exit(1); }
			i++;
			strcpy(SNP_FILE_NAME, argv[i]);
			has_snp = 1;
		}
		
		//Insertion file
		if(strcmp(argv[i],"-i")==0){
			if(i+1 > argc - 1){ usage(); exit(1); }
			i++;
			strcpy(INS_FILE_NAME, argv[i]);
			has_ins = 1;		
		}
		
		//Deletion file
		if(strcmp(argv[i],"-d")==0){
			if(i+1 > argc - 1){ usage(); exit(1); }
			i++;
			strcpy(DEL_FILE_NAME, argv[i]);
			has_del = 1;		
		}
		
		//Out file
		if(strcmp(argv[i],"-o")==0){
			if(i+1 > argc - 1){ usage(); exit(1); }
			i++;
			strcpy(OUT_FILE_NAME, argv[i]);
			has_out = 1;
		}

	}
	
	if (has_gen + has_snp + has_ins + has_del != 4) {
		usage();
		exit(1);
	}
	
	
	// FILE OPENING
		
	if (!has_out) OUT_FP = stdout;
	else {
		 if ((OUT_FP = fopen(OUT_FILE_NAME, "w")) == NULL) {
			fprintf(stderr, "ERROR : Couldn't open out file %s\n", OUT_FILE_NAME);
			exit(1);
		}
	}
	if ((GEN_FP = fopen(GEN_FILE_NAME, "r")) == NULL) {
		fprintf(stderr, "ERROR : Couldn't open sequence file %s\n", GEN_FILE_NAME);
		exit(1);
	}
	if ((SNP_FP = fopen(SNP_FILE_NAME, "r")) == NULL) {
		fprintf(stderr, "ERROR : Couldn't open SNP file %s\n", SNP_FILE_NAME);
		exit(1);
	}
	if ((INS_FP = fopen(INS_FILE_NAME, "r")) == NULL) {
		fprintf(stderr, "ERROR : Couldn't open Insertion file %s\n", INS_FILE_NAME);
		exit(1);
	}
	if ((DEL_FP = fopen(DEL_FILE_NAME, "r")) == NULL) {
		fprintf(stderr, "ERROR : Couldn't open Deletion file %s\n", DEL_FILE_NAME);
		exit(1);
	}
	
	
	// read in chromosome list
	printf("read in sequence\n");
	char line[1000];
	char **chroms = (char **) malloc(1000000 * sizeof(char *));
	for (i=0; i!=1000000; ++i) chroms[i] = (char *) malloc(500 * sizeof(char));
	
	i = 0;
	while (fgets(line, 1000, GEN_FP) != 0) {
		line[strlen(line)-1] = '\0';
		if (line[0] == '>') {
			strcpy(chroms[i], line+1);
			++i;
		}
	}
	int num_chroms = i;
	
	if (i==0) {
		fprintf(stderr, "ERROR: Sequence file not accessible or empty!\n");
		exit(0);
	}
	
	
	// START
	printf("start reading snindels\n");
	SNINDEL snindels[3];
	char eof[3], eoc[3];
	for (i=0; i!=3; ++i) {
		eof[i] = 0;
		eoc[i] = 0;
	}
	int idx, chr_idx = 0, chr_idx_min = 0;
	unsigned int pos = 400000000;
	
	// initialize snindels
	if (fgets(line, 1000, SNP_FP) == 0) {
		fprintf(stderr, "ERROR: SNP file empty!\n");
	}
	snindels[0].type = 'S';
	strcpy(snindels[0].strain_name, strtok(&(line[0]), "\t"));
	strcpy(snindels[0].chr, strtok('\0', "\t"));
	snindels[0].pos = atoi(strtok('\0', "\t"));
	strtok('\0', "\t");
	strcpy(snindels[0].seq, strtok('\0', "\t"));
	snindels[0].len = 1;
	
	while (strcmp((chroms+chr_idx)[0], snindels[0].chr) != 0) {
		++chr_idx;
	}
	chr_idx_min = chr_idx;
	
	if (fgets(line, 1000, INS_FP) == 0) {
		fprintf(stderr, "ERROR: Insertion file empty!\n");
	}
	snindels[1].type = 'I';
	strcpy(snindels[1].strain_name, strtok(&(line[0]), "\t"));
	strcpy(snindels[1].chr, strtok('\0', "\t"));
	snindels[1].pos = atoi(strtok('\0', "\t"));
	strtok('\0', "\t");
	snindels[1].len = atoi(strtok('\0', "\t"));
	strcpy(snindels[1].seq, strtok('\0', "\t"));
	
	chr_idx = 0;	
	while (strcmp((chroms+chr_idx)[0], snindels[1].chr) != 0) {
		++chr_idx;
	}
	chr_idx_min = (chr_idx < chr_idx_min)? chr_idx: chr_idx_min;
	
	if (fgets(line, 1000, DEL_FP) == 0) {
		fprintf(stderr, "ERROR: Deletion file empty!\n");
	}
	snindels[2].type = 'D';
	strcpy(snindels[2].strain_name, strtok(&(line[0]), "\t"));
	strcpy(snindels[2].chr, strtok('\0', "\t"));
	snindels[2].pos = atoi(strtok('\0', "\t"));
	strtok('\0', "\t");
	snindels[2].len = atoi(strtok('\0', "\t"));
	strcpy(snindels[2].seq, strtok('\0', "\t"));

	chr_idx = 0;	
	while (strcmp((chroms+chr_idx)[0], snindels[2].chr) != 0) {
		++chr_idx;
	}
	chr_idx = (chr_idx < chr_idx_min)? chr_idx: chr_idx_min;
	
	printf("chr_index: %d\n",chr_idx);
	
	while (eof[0]+eof[1]+eof[2] < 3) {
		//for (i=0; i!=3; ++i) printf("new iteration: min_chr: %s  snindel[%d]: type %c chr %s pos %d len %d seq %s/ eof %d eoc %d\n", chroms[chr_idx], i, snindels[i].type, snindels[i].chr, snindels[i].pos, snindels[i].len, snindels[i].seq, eof[i], eoc[i]);
		
		idx = -1;
		while (idx == -1) {
		
		//for (i=0; i!=3; ++i) printf("new iteration: min_chr: %s  snindel[%d]: type %c chr %s pos %d len %d seq %s/ eof %d eoc %d\n", chroms[chr_idx], i, snindels[i].type, snindels[i].chr, snindels[i].pos, snindels[i].len, snindels[i].seq, eof[i], eoc[i]);
		
			// determine next pos
			pos = 400000000;
			for (i = 0; i != 3; ++i) {

			//fprintf(OUT_FP, "snindel[%d]: type %c chr %s pos %d len %d seq %s/ eof %d eoc %d\n", i, snindels[i].type, snindels[i].chr, snindels[i].pos, snindels[i].len, snindels[i].seq, eof[i], eoc[i]);
								
				while (eoc[0]+eoc[1]+eoc[2] == 3) {
					++chr_idx;
					
					if (chr_idx >= num_chroms) {
						fprintf(stderr, "ERROR: unknown chromosome description in input files! last chr_idx: %s\n", chroms[chr_idx-1]);
						exit(0);
					}
					
					if (strcmp(snindels[0].chr, chroms[chr_idx]) == 0) eoc[0] = 0;
					if (strcmp(snindels[1].chr, chroms[chr_idx]) == 0) eoc[1] = 0;
					if (strcmp(snindels[2].chr, chroms[chr_idx]) == 0) eoc[2] = 0;
					
				}
				
				if (eoc[i]==1 || eof[i]==1) continue;
				
				if (strcmp(snindels[i].chr, chroms[chr_idx]) != 0) {
					eoc[i] = 1;
					continue;
				}
				//printf("min_chr: %s  snindel[%d]: type %c chr %s pos %d / eof %d eoc %d\n", chroms[chr_idx], i, snindels[i].type, snindels[i].chr, snindels[i].pos, eof[i], eoc[i]);
				
				
				if (snindels[i].pos < pos) {	// insertion before deletion, otherwise '<='
					pos = snindels[i].pos;
					idx = i;
					//printf("should be printed: min_chr: %s  snindel[%d]: type %c chr %s pos %d / eof %d eoc %d\n", chroms[chr_idx], i, snindels[i].type, snindels[i].chr, snindels[i].pos, eof[i], eoc[i]);
				}
			}
			
		}
				
		//printf("idx: %d\n",idx);
		
		// print out
		fprintf(OUT_FP, "%s\t%c\t%s\t%d\t%d\t%s\n", snindels[idx].strain_name, snindels[idx].type, snindels[idx].chr, snindels[idx].pos, snindels[idx].len, snindels[idx].seq);
		
		// get next snindel
		switch (idx) {
			case 0:	if (fgets(line, 1000, SNP_FP) == 0) {
						eof[0] = 1;
						eoc[0] = 1;
					}
					break;
			case 1:	if (fgets(line, 1000, INS_FP) == 0) {
						eof[1] = 1;
						eoc[1] = 1;
					}
					break;
			case 2:	if (fgets(line, 1000, DEL_FP) == 0) {
						eof[2] = 1;
						eoc[2] = 1;
					}
					break;
		}
		
		if (!eof[idx]) {
			strcpy(snindels[idx].strain_name, strtok(&(line[0]), "\t"));
			strcpy(snindels[idx].chr, strtok('\0', "\t"));
			if (strcmp(snindels[idx].chr, chroms[chr_idx]) != 0) eoc[idx] = 1;
			else eoc[idx] = 0;
			snindels[idx].pos = atoi(strtok('\0', "\t"));
			strtok('\0', "\t");
			if (idx == 0) {
				snindels[idx].len = 1;
				strcpy(snindels[idx].seq, strtok('\0', "\t"));
			}
			else {
				snindels[idx].len = atoi(strtok('\0', "\t"));
				strcpy(snindels[idx].seq, strtok('\0', "\t"));
			}
		}
		
	}
	
	
	fclose(SNP_FP);
	fclose(INS_FP);
	fclose(DEL_FP);
	fclose(OUT_FP);
	
	printf("done\n");
	
	
	return 1;
}


void usage()
{
	printf("\nPrepStrain v%sbeta\n", VERSION);
	printf("  (merges SHORE ConsensusAnalysis output files to serve as input file for GenomeMapper)\n"); 
	printf("written by Korbinian Schneeberger, Stephan Ossowski and Joerg Hagmann\n");
	printf("Max Planck Institute for Developmental Biology, Tübingen, Germany, 2008\n\n");
	printf("USAGE: prepstrain [options]\n");
	printf("\n");
	printf("mandatory options:\n");
	printf(" -g STRING  sequence file\n");
	printf(" -s STRING  SNPs file\n");
	printf(" -i STRING  insertions file\n");
	printf(" -d STRING  deletions file\n");
	
	printf("\noptional:\n");
	printf(" -o STRING  output file (default: stdout)\n");
	printf("\n");
}

