#! /usr/bin/python
import os
import logging
import sys
from allele_validator import Allele_Validator
from mhcflurry_2_0_percentile_data import score_distributions as mhcflurry_score_distributions
from PercentilesCalculators import MHCIPercentilesCalculator

MHCFlurry_DOCKER_IMG = os.environ.get("MHCFlurry_DOCKER_IMG", "harbor.lji.org/iedb-public/mhcflurry")

def read_peptides(fname):
    with open(fname, 'r') as r_file:
        peptides = r_file.readlines()
        peptides = [row.strip() for row in peptides if row.strip()]
        return peptides

def get_percentile_for_score(score, allele, peptide, method, score_distributions=None):
    ''' Returns the percentile scores for the raw scores passed.
    '''
    if method == 'ann' or method == 'comblib_sidney2008':
        allele = allele.replace("*","")

    percentiles_calculator = MHCIPercentilesCalculator(score_distributions)

    binding_length = len(peptide)
    try:
        percentile = percentiles_calculator.get_percentile_scores(
                    [score,], method, allele, binding_length)[0]
    except ValueError:
        raise
    return percentile

def predict(**kwargs):
    input_allele = kwargs.get('input_allele')
    #output_path = os.path.abspath(kwargs.get('output_path'))
    peptides_file_path = os.path.abspath(kwargs.get('fname'))
    peptides = read_peptides(peptides_file_path)
    # old docker cmd example: "docker run -v /home/jyan/test/ng_tc1-local/test_data/peptides.txt:/src/mhcflurry/input/peptides.txt  harbor.lji.org/iedb-public/mhcflurry python src/mhcflurry.py -p /src/mhcflurry/input/peptides.txt --allele HLA-A*02:01"
    #docker_cmd = f"docker run  -v {peptides_file_path}:/src/mhcflurry/input/peptides.txt   {MHCFlurry_DOCKER_IMG}"
    #mhcflurry_cmd = f'-e MHCFLURRY_ARGS="--peptides {' '.join(peptides)}  --allele {input_allele}"'
    #cmd = f"{docker_cmd} {mhcflurry_cmd}"
    # new docker cmd example: docker run -e MHCFLURRY_ARGS="--peptides SIINFEKL DENDREKLLL --alleles HLA-A*02:01,HLA-A*03:01,HLA-B*57:01,HLA-B*45:01,HLA-C*02:01,HLA-C*07:02  HLA-A*01:01,HLA-A*02:06,HLA-B*44:02,HLA-B*07:02,HLA-C*01:01,HLA-C*03:01" mhcflurry
    cmd = f"docker run    -e MHCFLURRY_ARGS=\"--peptides {' '.join(peptides)}  --alleles {input_allele.replace(',', ' ')} --affinity-only\" {MHCFlurry_DOCKER_IMG}"
    #print(f"running mhcflurry command: {cmd}")
    with os.popen(cmd) as process:
        result = process.read()
    score_unit = 'ic50'
    method = 'mhcflurry'
    header = ('allele','peptide', score_unit,'percentile')
    table_rows = []
    validator = Allele_Validator()
    if len(result.split('mhcflurry_affinity_percentile\n')) == 2:
        table = result.split('mhcflurry_affinity_percentile\n')[1]
        for row in table.split('\n'):
            row = row.strip()
            if row:
                allele,peptide,mhcflurry_affinity,mhcflurry_affinity_percentile = row.split(',')
                score = float(mhcflurry_affinity)
                iedb_label = validator.convert_synonym_to_iedblabel(allele)
                percentile = get_percentile_for_score(score, iedb_label, peptide, method, mhcflurry_score_distributions)
                score = round(score, 2)
                percentile = round(percentile, 2)
                table_rows.append((iedb_label, peptide, score, percentile))
    table_rows.sort(key=lambda tup: tup[2])
    table_rows.insert(0, header)
    return table_rows
    
if __name__ == '__main__':
    #test script if need
    pass

