from pkg_resources import resource_filename  # @UnresolvedImport
import pickle
import math
package_name = 'mhcii_comblib_predictor'
pickle_filename = 'comblib_predictor_data_pre_2016-02-12.p'
pickle_filepath = resource_filename(package_name, pickle_filename)

class ComblibPredictorDataManager(object):
    def __init__(self):
        with open(pickle_filepath, 'rb') as pfile:
            self.data_dict = pickle.load(pfile)

    def get_analytical_data(self, allele):
        '''
            Returns a data lookup dictionary for the given allele.
            Returns a dictionary similar to the following: {
                'slope': 0.25736498632395144,
                'geomean': '91.2291349683',
                'matrix': {
                    'A': (0.173263904639, -0.000773223694479, 0.123682147837, 0.078531006245, 0.0666184692524, 0.48216102184, 0.440688734309, 0.657949681511, 0.348140835329),
                    'C': (-0.106902341808, -0.160326604133, -0.216558539768, -0.138981119888, -0.225396978106, -0.232090473612, -0.321895454638, -0.20032827038, -0.267984681693),
                    'E': (-0.262675559425, -0.364923191222, -0.326049771465, -0.270143537969, -0.0179736644164, -0.096454320561, -0.366072477171, -0.845134726862, 0.0985802448712),
                    ...
                    'V': (-0.024391359001, 0.261184278249, 0.297765692739, 0.206878943338, 0.196349343831, 0.0886540141185, 0.094363378767, 0.20092524669, 0.505352079031),
                    'Y': (0.313262989885, 0.417783976496, 0.0223091880915, 0.29430794015, 0.0963186554405, -0.228108811974, 0.105290674298, 0.29405089936, -0.17599979356)
                },
                'intercept': 0.17647588770735254
            }
            Where 'matrix' keys are amino acids and the tuple is binding values based on position.
        '''
        return self.data_dict[allele]

data_manager = ComblibPredictorDataManager()

def single_prediction_comblib(sequence_list, allele_length_pairs, coreseq_len=9):

    analysis_results = {}

    for sequence in sequence_list:
        for allele, binding_length in allele_length_pairs:
            allele_prediction_data = data_manager.get_analytical_data(allele)
            matrix = allele_prediction_data['matrix']
            geomean = allele_prediction_data['geomean']
            intercept = allele_prediction_data['intercept']
            slope = allele_prediction_data['slope']

            scores = []
            # the 14 here Should be the binding length, 14=15-1
            #for nterm in range(len(sequence) - 14):
            for nterm in range(len(sequence) - (binding_length-1)):
                score = 100000000
                core = ""
                # 7 = 15-(9-1)
                #for i in range(7):
                for i in range(binding_length-(coreseq_len-1)):
                    temp_score = 0
                    temp_tscore = 0
                    for pos in range(coreseq_len):
                        amino_acid = sequence[nterm + pos + i]
                        try:
                            temp_score += matrix[amino_acid][pos]
                        except:
                            raise Exception("""Invalid character '%c' in sequence '%s'.""" % (amino_acid, sequence))
                    temp_tscore = 0 - temp_score
                    EIC50 = math.pow(10, temp_tscore) * float(geomean)
                    logEIC50 = math.log10(EIC50)
                    logPIC = (logEIC50 - intercept) / slope
                    PIC = math.pow(10, logPIC)
                    if PIC >= 1000000:
                        PIC = 1000000
                    if PIC <= 0.01:
                        PIC = 0.01
                    if PIC <= score:
                        score = PIC
                        core = sequence[nterm + i:9 + i + nterm]
                consensus_score = (core, score)
                scores.append(consensus_score)
            analysis_results.update({(sequence, allele, binding_length): tuple(scores)})
    return analysis_results
