// Authors: Korbinian Schneeberger, Stephan Ossowski and Joerg Hagmann
// Copyright (C) 2008 by Max-Planck Institute for Developmental Biology, Tuebingen, Germany


#ifndef GENOMEMAPPER_H_
#define GENOMEMAPPER_H_

#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
//SG
#include <unistd.h>
#include <pthread.h>
//SG
#ifdef CUDA 
  #include <cuda.h>
  #include <cuda_runtime_api.h>
#endif

#define VERSION "0.4.0"

// ##############################################################
// ####### GLOBAL VARIABLES #####################################
// ##############################################################

#define MAX_INDEX_DEPTH 13

//SG
#ifdef CUDA
int DEVICE_COUNT;
#define SHARED_MEM 16384
#endif

char ALL_HIT_STRATEGY;
int HITLEN_LIMIT;
char VERBOSE;
char REV_IDX_EXISTS;
char MAP_REVERSE;
char HAS_SLOT;
int SLOT;
char REPEATMAP;
char STRINGENT_GAPLIMIT;
int PRINT_SEQ;

int INDEX_DEPTH;
int POWER[MAX_INDEX_DEPTH];
int BINARY_CODE[4];

unsigned int NUM_CHROMOSOMES;

unsigned int LONGEST_HIT;

// ##############################################################
// ####### FILE HANDLING ########################################
// ##############################################################

char CHR_INDEX_FILE_NAME[500];
#ifndef METHYLOME
char INDEX_FWD_FILE_NAME[500];
char INDEX_REV_FILE_NAME[500];
char META_INDEX_FILE_NAME[500];
#else
char INDEX_FWD_CT_FILE_NAME[500];
char INDEX_REV_CT_FILE_NAME[500];
char META_INDEX_CT_FILE_NAME[500];
char INDEX_FWD_GA_FILE_NAME[500];
char INDEX_REV_GA_FILE_NAME[500];
char META_INDEX_GA_FILE_NAME[500];
#endif
char QUERY_FILE_NAME[500];
char OUT_FILE_NAME[500];
char SPLICED_OUT_FILE_NAME[500];
char GENOME_FILE_NAME[500];
char LEFTOVER_FILE_NAME[500];
char OUTPUT_FORMAT;

FILE *GENOME_FP;
FILE *CHR_INDEX_FP;
#ifndef METHYLOME
FILE *META_INDEX_FP;
#else
FILE *META_INDEX_CT_FP;
FILE *META_INDEX_GA_FP;
#endif
FILE *QUERY_FP;
FILE *OUT_FP;
FILE *SP_OUT_FP;
FILE *LEFTOVER_FP;

// ##############################################################
// ####### MEMORY MANAGEMENT ####################################
// ##############################################################

//#define INDEX_SIZE 16777216 //4^12
#define INDEX_SIZE 67108864 //4^13

typedef struct meta_idx_file_entry {
	int slot;
	unsigned int num;
} META_INDEX_ENTRY;

typedef struct stmg_entry {
	unsigned char id[4];
} STORAGE_ENTRY;

typedef struct position_structure {
	unsigned int pos;
	unsigned int chr;
} POS;

#ifndef METHYLOME
POS *BLOCK_TABLE;
#else
POS *BLOCK_TABLE_CT;
POS *BLOCK_TABLE_GA;
#endif
unsigned int BLOCK_TABLE_SIZE;

//typedef struct stmg_str {
//    unsigned int num_bins;
//    STORAGE_ENTRY *first_entry;
//    STORAGE_ENTRY *next_unused_entry;
//} STORAGE;

//STORAGE MEM_MGR;

typedef struct idx_entry {
	unsigned int num;
	//STORAGE_ENTRY *last_entry;
	unsigned int offset;
} INDEX_ENTRY;

#ifndef METHYLOME

INDEX_ENTRY *INDEX;
INDEX_ENTRY *INDEX_REV;

STORAGE_ENTRY *INDEX_REV_MMAP;
STORAGE_ENTRY *INDEX_FWD_MMAP;

#else 

INDEX_ENTRY *INDEX_CT;
INDEX_ENTRY *INDEX_REV_CT;
INDEX_ENTRY *INDEX_GA;
INDEX_ENTRY *INDEX_REV_GA;

STORAGE_ENTRY *INDEX_REV_CT_MMAP;
STORAGE_ENTRY *INDEX_FWD_CT_MMAP;
STORAGE_ENTRY *INDEX_REV_GA_MMAP;
STORAGE_ENTRY *INDEX_FWD_GA_MMAP;

#endif


unsigned long int MAX_POSITIONS;

// ##############################################################
// ####### SHORT READ ###########################################
// ##############################################################

char *READ;
char *READ_ID;
char *READ_QUALITY[3];
int READ_PE_FLAG;
unsigned int READ_LENGTH;
#define MAX_READ_LENGTH 1000
#define MAX_READ_ID_LENGTH 100
//int READSTART[100000000];
int REDUNDANT;
unsigned long int linenr;
char READ_FORMAT;	// 0: fq, 1: fa, 2: flat

unsigned int FLANKING;
char FLANK_SEQ[MAX_READ_LENGTH + 200];

// ##############################################################
// ####### ALIGNMENT ############################################
// ##############################################################

char *chrseq;	// for debugging
char *ALIGNSEQ;
#define MAX_EDIT_OPS 10
int NUM_EDIT_OPS;
int NUM_MISMATCHES;
int NUM_GAPS;
#ifdef THREADS
	unsigned char*** M;
	char*** T;
#else
	unsigned char** M;
	char** T;
#endif
unsigned char MM_SCORE;
unsigned char M_SCORE;
unsigned char GAP_SCORE;
#define DIAGONAL 'D'
#define LEFT 'L'
#define UP 'U'
char GAPS_MOST_RIGHT;
char OVERHANG_ALIGNMENT;
unsigned char WORST_SCORE;
unsigned char WORST_MM_SCORE;
char SCORES_OUT;

// ##############################################################
// ####### SPLICED HITTING ######################################
// ##############################################################

int SPLICED_HITS;
int SPLICED_HIT_MIN_LENGTH_SHORT;
int SPLICED_HIT_MIN_LENGTH_COMB;
int SPLICED_HIT_MIN_LENGTH_LONG;
int SPLICED_HIT_MAX_DIST;
int SPLICED_LONGEST_HIT_MIN_LENGTH;

// ##############################################################
// ####### STATISTICS ###########################################
// ##############################################################

//char STATISTICS;
//unsigned int PERFECT_READS;
//unsigned int PERFECT_HITS;
//unsigned int PERFECT_HITS_REV;
//unsigned long int NUM_HITS;
//unsigned int HITS_LEN[37];
//unsigned int* HITS_MM;
//unsigned int** HITS_READPOS;
unsigned int READS_MAPPED;
//unsigned long int NUM_ALIGNMENTS;
//unsigned long int NUM_WHOLE_ALIGNMENTS;
//unsigned int ENDSTART_MAPPED[2];
//unsigned int NOT_ALIGNED[2];
unsigned int NUM_READS;
//unsigned int HITS_PER_READ;
//unsigned long int GAPS_ENCOUNTERED[3];
//unsigned long int TOO_MANY_MMS[2];
//unsigned long int BREAK_GLOBAL_ALIGNMENT[2];
//unsigned long int BREAK_TB_IN_GLOBAL_ALIGNMENT;
//unsigned long int CELLS_GLOBAL;
//unsigned long int CELLS_OVERHANG;
//unsigned long int W;
//unsigned int listcount, listocc;

// ##############################################################
// ####### GENOME ###############################################
// ##############################################################

#define CHR_DESC_LENGTH 50

char** CHR_SEQ;
unsigned int* CHR_LENGTH;
char** CHR_DESC;
char** CHR_DESC_TMP;

typedef struct edit_op_structure {
	signed int pos;
	int mm: 1;
} EDIT_OPS;

typedef struct hit_structure {
	unsigned short int readpos;
	unsigned int start;
	unsigned int end;
	unsigned int chromosome;
	EDIT_OPS edit_op[MAX_EDIT_OPS];
	char orientation;
	unsigned char mismatches;	// including gaps!
	unsigned char gaps;
	signed char start_offset;
	signed char end_offset;
#ifdef METHYLOME
        unsigned char conversion;
#endif
	char aligned; // Should this be only with THREADS?
	struct hit_structure *same_eo_succ;	// the list of HITS_BY_SCORE - only forward pointer for now
	struct hit_structure *next;
	struct hit_structure *last;
} HIT;

#ifdef THREADS
typedef struct thread_const_structure {
	int num_gaps;
	int num_mismatches;
	unsigned int read_length;
	unsigned int chr_length;
	char all_hit_strategy;
	char overhang_alignment;
	char gaps_most_right;
	char stringent_gaplimit;
	unsigned char mm_score;
	unsigned char m_score;
	unsigned char gap_score;
	unsigned char worst_score;
	unsigned char worst_mm_score;
} THREAD_CONST;

typedef struct thread_structure {
	int num_thread;
	int hitlength_start;
	int hitlength_end;
	int* hitdepth_start;
	int* hitdepth_end;
	THREAD_CONST* thread_const;
} THREAD_DATA;

THREAD_DATA** thread_data;
int* THREAD_STARTED;
int THREADS_FINISHED;
pthread_t* tid;
pthread_attr_t attr;
pthread_mutex_t num_edit_ops_mutex;
pthread_mutex_t hits_into_score_mutex;
pthread_mutex_t *start_align_mutex;
pthread_cond_t *start_align_cond;
pthread_mutex_t aligned_mutex;
pthread_cond_t aligned_cond;
int NUM_THREADS;
int NEW_READ;
unsigned long int *HITS_NUM_PER_LENGTH;
unsigned long int NUM_HITS;

#endif 

typedef struct hits_by_score_structure {
	HIT *hitpointer;
	int num;
} HITS_BY_SCORE_STRUCT;

#define SCORE_INTERVAL 1
int NUM_SCORE_INTERVALS;

HIT **HIT_LISTS_OPERATOR;
HIT **READSTART_BINS;
HITS_BY_SCORE_STRUCT *HITS_BY_SCORE;
unsigned int HITS_IN_SCORE_LIST;

typedef struct mapping_entry_structure {
	unsigned int readpos;
	HIT *hit;
	struct mapping_entry_structure *pred;
	struct mapping_entry_structure *succ;
} MAPPING_ENTRY;

typedef struct chromosome_entry {

	int chromosome; // @TODO? Minus values indicate reverse hits (would save strand var = 1 byte, ~15MB)
	unsigned int genome_pos;
	char strand;

	struct chromosome_entry *next;
	MAPPING_ENTRY *mapping_entries;

	// It seems to be cheaper to store the back-pointer information (pos) 
	// in each of these entries rather than having a superior structure.
} CHROMOSOME_ENTRY;

CHROMOSOME_ENTRY **GENOME;

unsigned int LONGEST_CHROMOSOME;

// ##############################################################
// ####### MEMORY CONTAINER #####################################
// ##############################################################

#define CONTAINER_SIZE 100000
unsigned int CHROM_CONTAINER_SIZE;

typedef struct mapping_entry_container_structure {
	struct mapping_entry_structure entries[CONTAINER_SIZE];
	unsigned int used;
	struct mapping_entry_container_structure *next;
} MAPPING_ENTRY_CONTAINER;

typedef struct hit_container_structure {
	HIT entries[CONTAINER_SIZE];
	unsigned int used;
	struct hit_container_structure *next;
} HIT_CONTAINER;

typedef struct chromosome_entry_container_structure {
	CHROMOSOME_ENTRY* entries;
	unsigned int used;
} CHROMOSOME_ENTRY_CONTAINER;

MAPPING_ENTRY_CONTAINER* MAPPING_ENTRY_OPERATOR_FIRST;
MAPPING_ENTRY_CONTAINER* MAPPING_ENTRY_OPERATOR;
HIT_CONTAINER* HIT_OPERATOR_FIRST;
HIT_CONTAINER* HIT_OPERATOR;
CHROMOSOME_ENTRY_CONTAINER* CHROMOSOME_ENTRY_OPERATOR;

unsigned int MAX_USED_SLOTS;
unsigned int NUM_MAPPING_ENTRIES;

// ##############################################################
// ####### ROUTINES #############################################
// ##############################################################

// genomemmapper.c
//SG
char check_compile_options();
//int map_reads();

// usage.c
int usage();

// init.c
int init(int argc, char *argv[]);
int init_from_meta_index();

// index.c
int build_index();

// alloc.c
#ifndef METHYLOME
int alloc_index_memory();
#else
int alloc_index_memory(int conversion);
#endif
int alloc_genome_memory();
MAPPING_ENTRY* alloc_mapping_entry();
CHROMOSOME_ENTRY* alloc_chromosome_entry(unsigned int *pos, unsigned int *chr, char *strand);
HIT* alloc_hit();
int alloc_hit_lists_operator();
int alloc_hits_by_score();
MAPPING_ENTRY_CONTAINER* alloc_mapping_entry_container();
CHROMOSOME_ENTRY_CONTAINER* alloc_chromosome_entry_container();
HIT_CONTAINER* alloc_hit_container();
int dealloc_mapping_entries(); 
int dealloc_hits();
int dealloc_chromosome_entries();
int dealloc_hit_lists_operator();
int dealloc_hits_by_score();

//load.c
int load_genome();

//read.c
int read_short_read();
unsigned int map_fast();
#ifndef METHYLOME
int map_short_read(unsigned int num, int slot);
#else
int map_short_read(unsigned int num, int slot, int conversion);
#endif

//hit.c
int map_reads();
#ifndef METHYLOME
int seed2genome(unsigned int num, unsigned int slot, unsigned int readpos, char reverse);
#else
int seed2genome(unsigned int num, unsigned int slot, unsigned int readpos, char reverse, int conversion);
#endif
int insert_into_scorelist(HIT* hit, char d);
char *get_seq(unsigned int n);
int browse_hits();
//SG
char unique_base(char c);

//align.c
//SG
#ifdef CUDA
extern int init_cuda();
extern int align_hit_simple(HIT* hit);
extern int prepare_kbound_alignment (HIT* hit);
#else
#ifdef THREADS
void *prepare_align(void *arg);
int align_hit_simple(HIT* hit, THREAD_CONST* thread_const, int num_thread);
int prepare_kbound_alignment(HIT* hit, THREAD_CONST* thread_const, int num_thread);
#else
int align_hit_simple(HIT* hit);
int prepare_kbound_alignment (HIT* hit);
#endif
#endif
//print.c
char get_compl_base(char c);
void print_stats();
int print_alignment(HIT* hit, unsigned int num);
int print_hits();
int print_perfect_hits(unsigned int num);
int print_spliced_hits();
int print_largest_hit();
int comp_hits_4_splicing();
void print_leftovers();
void printhits();
void printhit(HIT* hit);
void print_alignment_matrix(int chrstart, int readstart, int length, int offset_front, int offset_end, int chr, char ori, int K);
int compare (const void *a, const void *b);

#endif
