#!/usr/bin/python
import sys
import numpy as np
import math
import os

def ensure_dir(f):
    d = os.path.dirname(f)
    if not os.path.exists(d):
        os.makedirs(d)

def binning(low, step, x):
	return np.floor((x - low)/step)

def vol(i, j, mini, maxi):
	
	d_min = 2.0 + i*0.1
	d_max = 2.0 + (i+1)*0.1
	a_min = mini + j*(maxi-mini)/100;
	a_max = mini + (1+j)*(maxi-mini)/100
	return math.pi*2*(d_max**3 - d_min**3)*(math.cos(a_min) - math.cos(a_max))

def name_conversion(protein):
	alphabet = {"Q":"q", "W":"w", "E":"e", "R":"r", "T":"t", "Y":"y", "U":"u", "I":"i", "O":"o", "P": "p", "A":"a", "S":"s", "D":"d", "F":"f", "G":"g", "H":"h", "J":"j", "K":"k", "L":"l", "Z":"z", "X":"x", "C":"c", "V":"v", "B":"b", "N":"n", "M":"m", "0":"0", "1":"1", "2":"2", "3":"3", "4":"4", "5":"5", "6":"6", "7":"7", "8":"8", "9":"9"}
	new_protein = ""
	for i in range(0, len(protein)):
		new_protein = new_protein + alphabet[protein[i]]
	return new_protein
		
#file with accepted clusters and number of sequences accepted
with open("./hb_seq/report.txt") as f:
	content =  f.read().splitlines()

#range for distances 
min_dist = 2.0
max_dist = 6.0
step_dist = 0.1

#range for dihedral angle
with open("./support/maxminangles.txt") as g:
	maxminangles = g.read().splitlines()

min_angle_ac = round(float(maxminangles[4]),4)
max_angle_ac = round(float(maxminangles[5]),4)
step_angle_ac = (max_angle_ac - min_angle_ac)/100

min_angle_do = round(float(maxminangles[1]),4)
max_angle_do = round(float(maxminangles[2]),4)
step_angle_do = (max_angle_do - min_angle_do)/100

#classification of donors and acceptors
dict_don = {"N  ": 0, "SG ":1, "OD1": 2, "OD2": 3, "OE1": 4, "OE2": 5, "ND1": 6, "NE2": 7, "NZ ": 8, "ND2": 9, "NE ":10, "NH1":11, "NH2":12, "OG ":13, "OG1":14, "OH ":15, "NE1": 16, "CA ": 17, "CB ":18, "CD ":19, "CD1":20, "CD2":21, "CE ":22, "CE1":23, "CE2":24, "CE3":25, "CG ":26, "CG1":27, "CG2":28, "CH2":29, "CZ ":30, "CZ2":31, "CZ3":32, "C  ":33}

dict_acc = {"O  ": 0, "SG ":1, "OD1": 2, "OD2": 3, "OE1": 4, "OE2": 5, "ND1": 6, "NE2": 7, "NZ ": 8, "SD ":9, "NE ":10, "NH1":11, "NH2":12, "OG ":13, "OG1":14, "OH ":15, "X1 ":16, "X2 ":17, "X3 ":18, "X4 ":19, "X5 ":20, "N  ":21, "ND2":22, "CE1":23, "CD2":24}

#implementing matrix for data: way to get to B: B[angle][distance][donor]
D = [0]*100
Da = [0]*100						#angles
for i in range(0,100):
	D[i] = [0]*40
	Da[i] = [0]*40					#distances
	for j in range(0,40):		
		D[i][j] = [0]*34			#donors
		Da[i][j] = [0]*34

A = [0]*100						#angles
Ad = [0]*100
for i in range(0,100):
	A[i] = [0]*40					#distances
	Ad[i] = [0]*40
	for j in range(0,40):		
		A[i][j] = [0]*25			#acceptors
		Ad[i][j] = [0]*25

#let's make directory tree
for x in ["acc","accD","don","donA"]:
	os.makedirs("hb_sta/"+x)
for x in ["acc","accD"]:
	for y in dict_acc.keys():
		os.makedirs("hb_sta/"+x+"/"+y)
for x in ["don","donA"]:
	for y in dict_don.keys():
		os.makedirs("hb_sta/"+x+"/"+y)

#let's go through hydrogen bonds files
for cluster in content:
	with open("./hb_seq/"+str(int(cluster[:7]))+"_a.fa") as s:
		seq = s.read().splitlines()
	j=0
	for j in range(0, len(seq)/2):
		name = seq[2*j]
		print "working on "+name
		with open("./hb_res/NHB_"+name_conversion(name[1:5])+".txt") as h:
			hb = h.read().splitlines()
		i=1
		while hb[i][0:5]!="LIST ":
			i=i+4
		i=i+1

		#check appropriate chain
		while hb[i][4]!=name[6] and hb[i][0:2]!="AM":
			i=i+1

		#while appropriate chain
		while hb[i][4]==name[6] and hb[i][0:2]!="AM":
			if hb[i][14:17] != "SOL":
				distance = float(hb[i][29:35])
				angle_ac = float(hb[i][37:43])
				angle_do = float(hb[i][45:51])
				donor = hb[i][10:13]
				acceptor = hb[i][24:27]
				dist_bin = int(binning(min_dist, step_dist, distance))
				angle_bin_ac = int(binning(min_angle_ac, step_angle_ac, angle_ac))
				angle_bin_do = int(binning(min_angle_do, step_angle_do, angle_do))

				#checking for upper-limit value
				if distance == max_dist:
					dist_bin = 39
				if angle_ac == max_angle_ac:
					angle_bin_ac = 99
				if angle_do == max_angle_do:
					angle_bin_do = 99

				#add to list of Ds: distance vs donor angle
				D[angle_bin_do][dist_bin][dict_don[donor]] = D[angle_bin_do][dist_bin][dict_don[donor]] + 1
				save_file = open("./hb_sta/don/"+donor+"/"+str(dist_bin)+"_"+str(angle_bin_do)+".txt","a")
				save_file.write(name[1:5] + " " + hb[i] +"\n")
				save_file.close()
				#add to list of Ds: distance vs acceptor angle
				Da[angle_bin_ac][dist_bin][dict_don[donor]] = Da[angle_bin_ac][dist_bin][dict_don[donor]] + 1
				save_file = open("./hb_sta/donA/"+donor+"/"+str(dist_bin)+"_"+str(angle_bin_ac)+".txt","a")
				save_file.write(name[1:5] + " " + hb[i] +"\n")
				save_file.close()
				#add to list of As: distance vs acceptor angle
				A[angle_bin_ac][dist_bin][dict_acc[acceptor]] = A[angle_bin_ac][dist_bin][dict_acc[acceptor]] + 1
				save_file = open("./hb_sta/acc/"+acceptor+"/"+str(dist_bin)+"_"+str(angle_bin_ac)+".txt","a")
				save_file.write(name[1:5] + " " + hb[i] +"\n")
				save_file.close()
				#add to list of As: distance vs donor angle
				Ad[angle_bin_do][dist_bin][dict_acc[acceptor]] = Ad[angle_bin_do][dist_bin][dict_acc[acceptor]] + 1
				save_file = open("./hb_sta/accD/"+acceptor+"/"+str(dist_bin)+"_"+str(angle_bin_do)+".txt","a")
				save_file.write(name[1:5] + " " + hb[i] +"\n")
				save_file.close()

			i=i+1

#just donors:
for donor in dict_don:
	j = dict_don[donor]
	res_file = open("./hb_sta/don/"+donor+".txt","a")
	Nres_file = open("./hb_sta/don/NORM_"+donor+".txt","a")
	current_string = "N" + "\t"
	for i in range(0, 40):
		current_string = current_string + str(2.0+i*0.1) + str("-") + str(2.0+(i+1)*0.1) + "\t"
	current_string = current_string + "\n"
	res_file.write(current_string)	
	Nres_file.write(current_string)	
	for i in range(0, 100):
		current_string = str(i+1) + "\t"
		Ncurrent_string = str(i+1) + "\t"
		for k in range(0, 40):
			current_string = current_string + str(D[i][k][j]) + "\t"
			Ncurrent_string = Ncurrent_string + str(round(D[i][k][j]/vol(k, i, min_angle_do, max_angle_do),2)) + "\t"
		current_string = current_string + "\n"
		Ncurrent_string = Ncurrent_string + "\n"
		res_file.write(current_string)
		Nres_file.write(Ncurrent_string)
	res_file.close()
	Nres_file.close()

for donor in dict_don:
	j = dict_don[donor]
	res_file = open("./hb_sta/donA/"+donor+".txt","a")
	Nres_file = open("./hb_sta/donA/NORM_"+donor+".txt","a")
	current_string = "N" + "\t"
	for i in range(0, 40):
		current_string = current_string + str(2.0+i*0.1) + str("-") + str(2.0+(i+1)*0.1) + "\t"
	current_string = current_string + "\n"
	res_file.write(current_string)
	Nres_file.write(current_string)	
	for i in range(0, 100):
		current_string = str(i+1) + "\t"
		Ncurrent_string = str(i+1) + "\t"
		for k in range(0, 40):
			current_string = current_string + str(Da[i][k][j]) + "\t"
			Ncurrent_string = Ncurrent_string + str(round(Da[i][k][j]/vol(k, i, min_angle_ac, max_angle_ac),2)) + "\t"
		current_string = current_string + "\n"
		Ncurrent_string = Ncurrent_string + "\n"
		res_file.write(current_string)
		Nres_file.write(Ncurrent_string)
	res_file.close()
	Nres_file.close()

#just acceptors
for acceptor in dict_acc:
	j = dict_acc[acceptor]
	res_file = open("./hb_sta/acc/"+acceptor+".txt","a")
	Nres_file = open("./hb_sta/acc/NORM_"+acceptor+".txt","a")
	current_string = "N" + "\t"
	for i in range(0, 40):
		current_string = current_string + str(2.0+i*0.1) + str("-") + str(2.0+(i+1)*0.1) + "\t"
	current_string = current_string + "\n"
	res_file.write(current_string)
	Nres_file.write(current_string)
	for i in range(0, 100):
		current_string = str(i+1) + "\t"
		Ncurrent_string = str(i+1) + "\t"
		for k in range(0, 40):
			current_string = current_string + str(A[i][k][j]) + "\t"
			Ncurrent_string = Ncurrent_string + str(round(A[i][k][j]/vol(k, i, min_angle_ac, max_angle_ac),2)) + "\t"
		current_string = current_string + "\n"
		Ncurrent_string = Ncurrent_string + "\n"
		res_file.write(current_string)
		Nres_file.write(Ncurrent_string)
	res_file.close()
	Nres_file.close()

for acceptor in dict_acc:
	j = dict_acc[acceptor]
	res_file = open("./hb_sta/accD/"+acceptor+".txt","a")
	Nres_file = open("./hb_sta/accD/NORM_"+acceptor+".txt","a")
	current_string = "N" + "\t"
	for i in range(0, 40):
		current_string = current_string + str(2.0+i*0.1) + str("-") + str(2.0+(i+1)*0.1) + "\t"
	current_string = current_string + "\n"
	res_file.write(current_string)
	Nres_file.write(current_string)	
	for i in range(0, 100):
		current_string = str(i+1) + "\t"
		Ncurrent_string = str(i+1) + "\t"
		for k in range(0, 40):
			current_string = current_string + str(Ad[i][k][j]) + "\t"
			Ncurrent_string = Ncurrent_string + str(round(Ad[i][k][j]/vol(k, i, min_angle_do, max_angle_do),2)) + "\t"
		current_string = current_string + "\n"
		Ncurrent_string = Ncurrent_string + "\n"
		res_file.write(current_string)
		Nres_file.write(Ncurrent_string)
	res_file.close()
	Nres_file.close()

