'''
Created on May 2, 2016

@author: jivan
'''
import os, math, re
from iedbtools_utilities.sequence_manipulation import split_sequence
import tempfile
from logging import getLogger
from allele_info import is_user_defined_allele, MHCIAlleleData
from subprocess import Popen, PIPE
logger = getLogger(__name__)

EXECUTABLE_NAME = 'PickPocket'
EXECUTABLE_DIRECTORY = os.path.abspath(os.path.dirname(__file__))
EXECUTABLE_PATH = os.path.normpath(os.path.join(EXECUTABLE_DIRECTORY, EXECUTABLE_NAME))

def predict_single(sequence, allele_length_pair):
    '''Given one protein sequence, break it up into peptides, return their predicted binding scores.'''
    allele, binding_length = allele_length_pair
    peptide_list = split_sequence(sequence, binding_length)
    scores = predict_peptide_list(peptide_list, allele_length_pair)
    return scores

def predict_peptide_list(peptide_list, allele_length_pair):
    '''This routine can be directly called so that you do not make a file for each prediction.'''
    allele, binding_length = allele_length_pair

    # Temporary fix.
    stripped_allele_name = strip_allele_name(allele)
                
    infile = tempfile.NamedTemporaryFile(prefix='pickpocket_', suffix='_input', delete=False)
    for peptide in peptide_list:
        infile.write(peptide + "\n")
    infile.close()

    user_defined_allele = is_user_defined_allele(allele)
    if user_defined_allele:
        user_input = allele.split(">")
        for i in user_input[1:]:
            if(len(i) > 0):
                end_of_name = i.find("\n")
                name = i[:end_of_name].rstrip()
                name = '>' + name
                seq = i[end_of_name:].split()
                usermhcseq = "".join(seq)
                usermhc = name + '\n' + usermhcseq
        usermhcfile = tempfile.NamedTemporaryFile(prefix='pickpocket_', suffix='_usermhc')
        usermhcfile.write(usermhc)
        usermhcfile.close()

    if user_defined_allele:
        cmd = [
            EXECUTABLE_PATH, '-hlaseq', usermhcfile.name, '-l', str(binding_length),
            '-inptype', '1', '-p', infile.name
        ]
    else:
        cmd = [
            EXECUTABLE_PATH, '-a', stripped_allele_name, '-l', str(binding_length),
            '-inptype', '1', '-p', infile.name
        ]
    try:
        msg = 'Calling PickPocket executable:\n{}'.format(' '.join(cmd))
        logger.info(msg)
        p = Popen(cmd, stdout=PIPE, stderr=PIPE)
        pickpocket_output, stderr_ignored = p.communicate()
    except OSError as ex:
        msg = 'Problem calling PickPocket executable:\n{}'.format(' '.join(cmd))
        logger.error(msg)
        raise

    scores = parse_pickpocket(pickpocket_output)

    os.remove(infile.name)
    if user_defined_allele:
        os.unlink(usermhcfile.name)

    if (len(peptide_list) != len(scores)):
        msg = "len(peptide_list) != len(scores) -- {} != {}".format(len(peptide_list), len(scores))
        msg = msg + '\n' + pickpocket_output
        logger.error(msg)
        raise Exception(msg)

    return (tuple(scores))


def strip_allele_name(allele_name):
    """ | *brief*: Temporary hack to get the allele name right for pickpocket executable.
        | *author*: Dorjee
        | *created*: 2016-09-12

        TODO: A more permanent solution would be to create a column in the database for canonical allele name.
    """
    miad = MHCIAlleleData()
    species = miad.get_species_for_allele_name(allele_name=allele_name)
    if species in ['macaque', 'pig']:
        stripped_allele_name = allele_name.replace('*',':')
    else:
        stripped_allele_name = allele_name.replace('*', '')
    return stripped_allele_name    

def parse_pickpocket(content):
    '''A working version of a parser.'''
    scores = []
    for lines in content.split('\n'):
        if 'PEPLIST' in lines:
            data_list = lines.split()
            if data_list[0].isdigit():
            # TODO: is this a duplicate?
                if re.search("USER_DEF", data_list[1]):
                    peptide = data_list[2]
                    binding_affinity = float(data_list[4])
                    IC50_score = math.pow(50000, (1 - binding_affinity))
                    scores.append(IC50_score)
                else:
                    IC50_score = transform_score(data_list[4])
                    scores.append(IC50_score)
    return scores

def transform_score(x):
    '''Given nethmcpan score, returns log10(IC50).
    1-log50k(aff) = score
    aff = 50000^(-(score - 1))'''
    ba = float(x)
    try:
        ic50 = math.pow(50000, (1 - ba))
    except:
        print 'EXCEPTION, math error, x', x
    return ic50
