#!/usr/bin/python3
import os

class count_ec:

	def __init__(self,lsdir):
		self.lsdir = os.listdir(lsdir)

	def printls(self):
		p_count = 0
		ec_number_list = {}

		with open("/array1/www/dbCAN_sub/data/combined23_6_22.txt", "r") as sub:
			next(sub)
			for i in sub:
				ec_list = []
				sub = i.split("\t")[0]
				ec_num = i.split("\t")[8].split("[")[1].split("]")[0]
				protein_count = i.split("\t")[1]

				split = sub.split("_")
				if len(split) == 3:
					sub = split[0]+"_"+split[2]

				if '.' in ec_num:
					ec_list = ec_num.split(",")
				else:
					ec_list = []

				# print(str(len(ec_list))+"	"+ec_num)


				if str(len(ec_list)) not in ec_number_list:
					ec_number_list[str(len(ec_list))] = []
					ec_number_list[str(len(ec_list))].append(protein_count)
				else:
					ec_number_list[str(len(ec_list))].append(protein_count)

		# print(ec_number_list)
		for j in ec_number_list:
			sum = 0
			for m in ec_number_list[j]:
				sum = sum + int(m)
			print(j, str(sum), ec_number_list[j])
		# for x in ec_number_list:
		# 	print(str(x)+"\n")
		# 	print(str(x)+"\t"+str(len(ec_number_list[x]))+"\n")
		# 	with open("/array1/www/dbCAN_sub/data/subec_protein_num.csv", "a") as c:
		# 		c.write(x+"\t"+str(len(ec_number_list[x]))+"\n")
		# print("p_count: " + str(p_count))

if __name__ == "__main__":
	cec = count_ec("/array1/www/dbCAN_sub/data/CAZy_with_ECs")
	cec.printls()