// Authors: Korbinian Schneeberger, Stephan Ossowski and Joerg Hagmann
// Copyright (C) 2008 by Max-Planck Institute for Developmental Biology, Tuebingen, Germany

#ifndef GENOMEMAPPER_H_
#define GENOMEMAPPER_H_

#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define VERSION "0.1.1"

#define OLD 0 

// ##############################################################
// ####### GLOBAL VARIABLES #####################################
// ##############################################################

#define MAX_INDEX_DEPTH 12

char ALL_HIT_STRATEGY;
int HITLEN_LIMIT;
char DEBUG;
int DEBUGREAD;
char VERBOSE;
char REV_IDX_EXISTS;
char MAP_REVERSE;
char HAS_SLOT;
int SLOT;
char REPEATMAP;
char STRINGENT_GAPLIMIT;

int INDEX_DEPTH;
int POWER[MAX_INDEX_DEPTH];
int BINARY_CODE[4];

unsigned int NUM_CHROMOSOMES;
unsigned int NUM_POS;

unsigned int LONGEST_HIT;

// ##############################################################
// ####### FILE HANDLING ########################################
// ##############################################################

char INDEX_FILE_NAME[500];
char INDEX_REV_FILE_NAME[500];
char META_INDEX_FILE_NAME[500];
char QUERY_FILE_NAME[500];
char OUT_FILE_NAME[500];
char GENOME_FILE_NAME[500];
char LEFTOVER_FILE_NAME[500];
char OUTPUT_FORMAT;

FILE *GENOME_FP;
FILE *INDEX_FP;
FILE *INDEX_REV_FP;
FILE *META_INDEX_FP;
FILE *QUERY_FP;
FILE *OUT_FP;
FILE *LEFTOVER_FP;

// ##############################################################
// ####### MEMORY MANAGEMENT ####################################
// ##############################################################

//#define INDEX_SIZE 16777216 //4^12
#define INDEX_SIZE 67108864 //4^13

typedef struct meta_idx_file_entry {
	int slot;
	unsigned int num;
} META_INDEX_ENTRY;

typedef struct stmg_entry {
	unsigned char id[4];
} STORAGE_ENTRY;

typedef struct position_structure {
	unsigned int pos;
	unsigned int chr;
} POS;

POS *BLOCK_TABLE;
unsigned int BLOCK_TABLE_SIZE;

typedef struct stmg_str {
    unsigned int num_bins;
	STORAGE_ENTRY *first_entry;
    STORAGE_ENTRY *next_unused_entry;
} STORAGE;

STORAGE MEM_MGR;

typedef struct idx_entry {
	unsigned int num;
	STORAGE_ENTRY *last_entry;
} INDEX_ENTRY;

INDEX_ENTRY *INDEX;
INDEX_ENTRY *INDEX_REV;

unsigned long int MAX_POSITIONS;

// ##############################################################
// ####### SHORT READ ###########################################
// ##############################################################

char *READ;
char *READ_ID;
char *READ_QUALITY[3];
int READ_PE_FLAG;
unsigned int READ_LENGTH;
#define MAX_READ_LENGTH 1000
#define MAX_READ_ID_LENGTH 100
//int READSTART[100000000];
int REDUNDANT;
unsigned long int linenr;
char READ_FORMAT;	// 0: fq, 1: fa, 2: flat

// ##############################################################
// ####### ALIGNMENT ############################################
// ##############################################################

char *chrseq;	// for debugging
char *ALIGNSEQ;
#define MAX_EDIT_OPS 10
int NUM_EDIT_OPS;
int NUM_MISMATCHES;
int NUM_GAPS;
double** M;
char** T;
double MM_SCORE;
double M_SCORE;
double GAP_SCORE;
double GAPEXT_SCORE;	//@TODO
#define DIAGONAL 'D'
#define LEFT 'L'
#define UP 'U'
char GAPS_MOST_RIGHT;
char OVERHANG_ALIGNMENT;
double WORST_SCORE;
double WORST_MM_SCORE;
char SCORES_OUT;

// ##############################################################
// ####### STATISTICS ###########################################
// ##############################################################

char STATISTICS;
unsigned int PERFECT_READS;
unsigned int PERFECT_HITS;
unsigned int PERFECT_HITS_REV;
unsigned long int NUM_HITS;
unsigned int HITS_LEN[37];
unsigned int* HITS_MM;
unsigned int** HITS_READPOS;
unsigned int READS_MAPPED;
unsigned long int NUM_ALIGNMENTS;
unsigned long int NUM_WHOLE_ALIGNMENTS;
unsigned int ENDSTART_MAPPED[2];
unsigned int NOT_ALIGNED[2];
unsigned int NUM_READS;
unsigned int HITS_PER_READ;
unsigned long int GAPS_ENCOUNTERED[3];
unsigned long int TOO_MANY_MMS[2];
unsigned long int BREAK_GLOBAL_ALIGNMENT[2];
unsigned long int BREAK_TB_IN_GLOBAL_ALIGNMENT;
unsigned long int CELLS_GLOBAL;
unsigned long int CELLS_OVERHANG;
unsigned long int W;
unsigned int listcount;

// ##############################################################
// ####### GENOME ###############################################
// ##############################################################

#define CHR_DESC_LENGTH 50

char** CHR_SEQ;
unsigned int* CHR_LENGTH;
char** CHR_DESC;
char** CHR_DESC_TMP;

typedef struct edit_op_structure {
	signed int pos;
	int mm: 1;
} EDIT_OPS;

typedef struct hit_structure {
	unsigned short int readpos;
	unsigned int start;
	unsigned int end;
	unsigned int chromosome;
	char orientation;
	unsigned char mismatches;	// including gaps!
	unsigned char gaps;
	signed char start_offset;
	signed char end_offset;
	EDIT_OPS edit_op[MAX_EDIT_OPS];
	struct hit_structure *same_eo_succ;	// the list of HITS_BY_SCORE - only forward pointer for now
	struct hit_structure *next;
	struct hit_structure *last;
} HIT;

typedef struct hits_by_score_structure {
	HIT *hitpointer;
	int num;
} HITS_BY_SCORE_STRUCT;

#define SCORE_INTERVAL 1
int NUM_SCORE_INTERVALS;

HIT **HIT_LISTS_OPERATOR;
HIT **READSTART_BINS;
HITS_BY_SCORE_STRUCT *HITS_BY_SCORE;
unsigned int HITS_IN_SCORE_LIST;

typedef struct mapping_entry_structure {
	unsigned int readpos;
	HIT *hit;
	struct mapping_entry_structure *pred;
	struct mapping_entry_structure *succ;
} MAPPING_ENTRY;

#if OLD == 1
typedef struct chromosome_entry {
	struct mapping_entry_structure **mapping_entries;
	unsigned int genome_pos;
} CHROMOSOME_ENTRY;
#else
typedef struct chromosome_entry {

	int chromosome; // @TODO? Minus values indicate reverse hits (would save strand var = 1 byte, ~15MB)
	unsigned int genome_pos;
	char strand;

	struct chromosome_entry *next;
	MAPPING_ENTRY *mapping_entries;

	// It seems to be cheaper to store the back-pointer information (pos) 
	// in each of these entries rather than having a superior structure.
} CHROMOSOME_ENTRY;
#endif

CHROMOSOME_ENTRY **GENOME;

unsigned int LONGEST_CHROMOSOME;

// ##############################################################
// ####### MEMORY CONTAINER #####################################
// ##############################################################

#define CONTAINER_SIZE 100000
unsigned int CHROM_CONTAINER_SIZE;

typedef struct mapping_entry_container_structure {
	struct mapping_entry_structure entries[CONTAINER_SIZE];
	unsigned int used;
	struct mapping_entry_container_structure *next;
} MAPPING_ENTRY_CONTAINER;

typedef struct hit_container_structure {
	HIT entries[CONTAINER_SIZE];
	unsigned int used;
	struct hit_container_structure *next;
} HIT_CONTAINER;

typedef struct chromosome_entry_container_structure {
	CHROMOSOME_ENTRY* entries;
	unsigned int used;
} CHROMOSOME_ENTRY_CONTAINER;

MAPPING_ENTRY_CONTAINER* MAPPING_ENTRY_OPERATOR_FIRST;
MAPPING_ENTRY_CONTAINER* MAPPING_ENTRY_OPERATOR;
HIT_CONTAINER* HIT_OPERATOR_FIRST;
HIT_CONTAINER* HIT_OPERATOR;
CHROMOSOME_ENTRY_CONTAINER* CHROMOSOME_ENTRY_OPERATOR;

unsigned int MAX_USED_SLOTS;
unsigned int NUM_MAPPING_ENTRIES;

// ##############################################################
// ####### ROUTINES #############################################
// ##############################################################

// genomemmapper.c
int map_reads();

// usage.c
int usage();

// init.c
int init(int argc, char *argv[]);
int init_from_meta_index();

// index.c
int build_index();

// alloc.c
int alloc_index_memory();
int alloc_genome_memory();
MAPPING_ENTRY* alloc_mapping_entry();
#if OLD == 1
CHROMOSOME_ENTRY* alloc_chromosome_entry(unsigned int *pos);
#else
CHROMOSOME_ENTRY* alloc_chromosome_entry(unsigned int *pos, unsigned int *chr, char *strand);
#endif
HIT* alloc_hit();
int alloc_hit_lists_operator();
int alloc_hits_by_score();
MAPPING_ENTRY_CONTAINER* alloc_mapping_entry_container();
CHROMOSOME_ENTRY_CONTAINER* alloc_chromosome_entry_container();
HIT_CONTAINER* alloc_hit_container();
int dealloc_mapping_entries(); 
int dealloc_hits();
int dealloc_chromosome_entries();
int dealloc_hit_lists_operator();
int dealloc_hits_by_score();

//load.c
int load_genome();

//read.c
int read_short_read();
unsigned int map_fast();
int map_short_read(unsigned int num, int slot);

//hit.c
int map_reads();
int seed2genome(unsigned int num, unsigned int slot, unsigned int readpos, char reverse);
int insert_into_scorelist(HIT* hit, char d);
char *get_seq(unsigned int n);

//align.c
int align_hit_simple(HIT* hit);
int prepare_kbound_alignment (HIT* hit);
char unique_base(char c);

//print.c
char get_compl_base(char c);
void print_stats();
int print_alignment(HIT* hit, unsigned int num);
int print_hits();
int print_perfect_hits(unsigned int num);
void print_leftovers();
void printhits();
void printhit(HIT* hit);
void print_alignment_matrix(int chrstart, int readstart, int length, int offset_front, int offset_end, int chr, char ori, int K);

#endif
