#!/usr/bin/env python
from __future__ import print_function
import os
import sys
import re
import json
import shutil
import tempfile
import logging
from os.path import isfile
from urllib.request import urlopen
from shutil import copyfileobj
from tabulate import tabulate

logging.basicConfig(level=logging.WARNING, format='%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', datefmt='%Y-%m-%d:%H:%M:%S',)

# adding all packages in method folder to the python path
script_dir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(script_dir )
methods_dir = os.path.abspath(os.path.join(script_dir, '../method'))
                          
for method_dir_name in os.listdir(methods_dir):
    method_base_dir = os.path.join(methods_dir, method_dir_name)
    if os.path.isdir(method_base_dir):
        sys.path.append(method_base_dir)

from mhcipredictor import MHCIPredictor
from processing import processing_predict
from netchop_predict import PredictNetChop
from immunogenicity_predictor import ImmunogenicityPredictor, immunogenicity_allele_dict
from split import split_parameters_file, transfer_fasta_to_peptide_file
from aggregation import aggregate_result_file
# TODO: determine which functions from the utils are more generally applicable than
#       just for this standalone and pull out into a 'common' utils library.
from util import InputError, UnexpectedInputError, PredictorError, PeptideSequenceInput, get_species, InputData, get_peptides, MethodSet, get_mhc_list


from optparse import OptionParser
from functools import reduce

from collections import namedtuple, OrderedDict
NetchopOptions = namedtuple('NetchopOptions',
    ["method", "allele", "peptide_file", "fasta_file", "peptide_length_range", "network_method", "threshold", "cleavage_weight", "tap_weight", "supertype", "epitope_threshold", "plot_name", "noplot" ],
    #defaults=('0', None, None, None, None, None, )
)
NetchopOptions.__new__.__defaults__ = (None,) * len(NetchopOptions._fields)

import string
import random
from allele_validator import Allele_Validator
from sequences import Proteins

# from nxg-tools package
from nxg_common.nxg_common import save_file_from_URI

def generate_random_str(length):
    return ''.join(random.sample(string.digits+string.ascii_letters, length))


def eprint(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)


def read_peptides(fname):
    with open(fname, 'r') as r_file:
        peptides = r_file.readlines()
        peptides = [row.strip() for row in peptides if row.strip()]
        return peptides

def group_peptides_by_length(peptide_list):
    peptide_groups_by_length = []
    lengths = set(map(len, peptide_list))
    for length in lengths:
        peptide_groups_by_length.append([pep for pep in peptide_list if len(pep) == length])
    return peptide_groups_by_length

class Prediction:
    
    def __init__(self):
        self.version = '0.1-beta'
        self.row_data = []


    @staticmethod
    def print_method_versions():
        data = [
            ['binding', 'consensus', '2.18'],
            ['binding', 'ann', '4.0'],
            ['binding', 'netmhcpan_ba', '4.1'],
            ['binding', 'netmhcpan_el', '4.1'],
            ['binding', 'comblib_sidney2008', '1.0'],
            ['binding', 'smm', '1.0'],
            ['binding', 'smmpmbec', '1.0'],
            ['binding', 'mhcnp', '2.0.3'],
            ['binding', 'mhcflurry', '2.1.1'],
            ['processing', 'basic_processing', '1.0'],
            ['processing', 'netchop', '3.1'],
            ['processing', 'netctl', '1.1'],
            ['processing', 'netctlpan', '1.1'],
            ['immunogenicity', 'immunogenicity', '1.0'],
        ]

        headers = ['Tool Group', 'Method', 'Version']

        # Using tabulate to neatly format and align
        print(tabulate(data, headers=headers, tablefmt='pipe'))


    @staticmethod
    def commandline_help():
        # read in the example_commands.txt file and print it out
        f = open(os.path.join(script_dir, 'example_commands.txt'), 'r')
        lines = f.readlines()
        print("".join(lines))
        f.close()

    def main(self):
        import select

        try:
            usage = "usage: %prog method allele or [options] arg length\n---\n\
Following are the available choices - \n   \
method: ann, comblib_sidney2008, consensus, IEDB_recommended, netmhcpan_ba, netmhcpan_el, smm, smmpmbec\n   \
allele: an allele name\n   \
length: a length"
            
            #TODO: Add option to run in 'internal mode' that will allow
            #      skipping certain validations, e.g.:
            #      checking if peptides are valid, method/length/allele combinations, etc.

            parser = OptionParser(usage=usage, version="%prog {}".format(self.version), add_help_option=False)

            parser.add_option("-v", "--versions",
                              action="store_true",
                              dest="version_flag",
                              default=False,
                              help="print specific methods and their versions.")

            parser.add_option("-h", "--help",
                              action="store_true",
                              dest="help",
                              default=False,
                              help="print available commands.")

            parser.add_option("--split",
                              action="store_true",
                              dest="split_parameters_flag",
                              default=False,
                              help="flag to indicate the action we want to take with the standalone: split parameters into JSON files")

            parser.add_option("--split-dir",
                              dest="split_parameters_dir",
                              default='',
                              help="the diretory for the JSON files that input parameters splitted into")

            parser.add_option("--split-inputs-dir",
                              dest="split_inputs_dir",
                              default=None,
                              help="the diretory for the sequence and peptide files that input sequences splitted into")

            parser.add_option("--input-ic50",
                              dest="input_ic50_file",
                              default='',
                              help="the file from upstream binding prediction that provide ic50 values")

            parser.add_option("--aggregate",
                              action="store_true",
                              dest="aggregate_parameters_flag",
                              default=False,
                              help="flag to indicate the action to aggregate the results")

            parser.add_option("--job-desc-file",
                              dest="job_desc_file",
                              default='',
                              help="the file path for the job description")

            parser.add_option("--aggregate-input-dir",
                              dest="aggregate_input_dir",
                              default='',
                              help="the diretory for the JSON files which have input parameters")

            parser.add_option("--aggregate-result-dir",
                              dest="aggregate_result_dir",
                              default='',
                              help="the diretory for the JSON files contains results need to be aggregated as well as the place we place the final result file")

            parser.add_option("--keep-empty-row",
                              action="store_true",
                              dest="keep_empty_row",
                              default=False,
                              help="flag to indicate keeping empty rows for results aggregation")

            parser.add_option("--has-consensus",
                              action="store_true",
                              dest="has_consensus",
                              default=False,
                              help="flag to indicate has_consensus method")

            parser.add_option("--batch",
                              action="store_true",
                              dest="batch_flag",
                              default=False,
                              help="flag to predict for all json file in input dir")

            parser.add_option("--input-dir", "-i", dest="input_dir",
                              help="input dir for JSON files.", metavar="INPUT_DIR")

            parser.add_option("--run-predictor",
                              action="store_true",
                              dest="predictor_flag",
                              default=True,
                              help="flag to indicate the action we want to take with the standalone: run predictor with atom unit")

            parser.add_option("--assume-valid",
                              action="store_true",
                              dest="assume_valid_flag",
                              default=False,
                              help="flag to indicate skiping validation")

            parser.add_option("-p", dest="filename_peptide",
                              help="FILE containing a list of peptide sequence.", metavar="PEPTIDE")

            parser.add_option("-u", dest="download_fasta_url",
                  help="download link for sequence.", metavar="FASTA_URL")

            parser.add_option("--method", dest="method",
                              help="prediction method.", metavar="METHOD")
            parser.add_option("--allele", dest="allele",
                              help="alleles", metavar="ALLELE")
            parser.add_option("--length", dest="length", default='9',
                              help="peptide length", metavar="PEPTIDE_LENGTH")

            parser.add_option("-j", dest="json_filename",
                              help="FILE containing all parameters.", metavar="JSON_FILE")

            parser.add_option("--output-prefix", "-o", dest="output_prefix",
                              help="prediction result output path and prefix.", metavar="OUTPUT_PREFIX")

            parser.add_option("--output-format", "-f", dest="output_format", default="tsv",
                              help="prediction result output format.", metavar="OUTPUT_FORMAT")

            (options, args) = parser.parse_args()

            if options.help:
                self.commandline_help()
                exit(0)

            if options.version_flag:
                self.print_method_versions()
                exit(0)

            if options.aggregate_parameters_flag:
                aggregate_result_file(options.job_desc_file, options.aggregate_input_dir, options.aggregate_result_dir, options.has_consensus, options.keep_empty_row)
                exit(0)

            if options.batch_flag:
                run_batch_predictions(options, args)
                exit(0)

            if not (options.json_filename or options.filename_peptide or options.download_fasta_url):
                self.commandline_help()
                exit(0)

            if options.split_parameters_flag:
                split_parameters_file(options.json_filename, options.split_parameters_dir, options.split_inputs_dir, options.assume_valid_flag, options.keep_empty_row)
                exit(0)

            if not sys.stdin.isatty():
                stdin = sys.stdin.readline().strip()
                args.append(stdin)

            args = list(filter(None, args))
            commandline_input_prediction(options, args)
 
        except Exception as e:
            eprint(str(e))
            exit(1)

def run_batch_predictions(options, args):
    input_dir = options.input_dir
    output_dir = options.output_prefix
    os.makedirs(output_dir, exist_ok=True)

    for root, dirs, files in os.walk(input_dir):  
        for file in files:
            file_name = file.split('.')[0]
            print('run prediciton for input file: %s/%s' % (input_dir,file))
            options.json_filename = os.path.join(input_dir, file)
            options.output_prefix = os.path.join(output_dir, file_name)
            commandline_input_prediction(options, args)

def truncate_file(output_path):
    with open(output_path, 'a+') as tsv_file:
        tsv_file.truncate(0)

def print_result(result):
    table_rows = result
    for row in table_rows:
        print( '\t'.join(map(str, row)))

def save_tsv(result, output_path):
    table_rows = result
    with open(output_path, 'w') as tsv_file:
        tsv_file.write( '\n'.join(['\t'.join(map(str, row)) for row in table_rows]))
        tsv_file.write( '\n')

def save_json(result, output_path):
    output_dir = os.path.dirname(output_path)
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
    with open(output_path, 'w') as w_file:
        json.dump(result, w_file, indent=2)


core_columns_dict = {
    "residue_table": [
        "position",
        "amino_acid",
        "sequence_number"
    ],
    "peptide_table": [
        "sequence_number",
        "peptide",
        "allele",
        "start",
    ],
    "netmhcpan_allele_distance": [
        "input_allele",
    ]
}


# Recursion function to update addition_output
def merge_dict_with_recursion_update(dict1, dict2):
    for k,v in dict2.items():
        if k in dict1 and  type(v) is dict:
            merge_dict_with_recursion_update(dict1[k],v)
        else:
            dict1[k] = v
ResultTypeSortingOrder = ['peptide_table', 'residue_table', 'allele_distance_table', 'netmhcpan_allele_distance', 'processing_plots', 'processing.netchop plots', 'processing.netctl plots', 'processing.netctlpan plots', 'network_graph', 'input_sequence_table']




def update_result_data_dict_by_table_data(result_data_dict, table_columns, table_data, predictor_source, core_columns):
    ''' 
    1. we believe that there will be not 2 rows have same core column values
    maybe we can add a validation for this
    2. mismatch_keys means new colomns need to be added
    ''' 

    table_data_has_core_columns = [table_columns.index(col) if col in table_columns else -1 for col in core_columns]

    del_keys = set()

    if result_data_dict:
        no_match_empty_value = {k:'-' for k in next(iter(result_data_dict.values())).keys()}
    else:
        no_match_empty_value = {}
    for row in table_data:
        table_key = tuple([row[i] if i >= 0 else '-' for i in table_data_has_core_columns ])
        no_match, exact_match_keys, mismatch_keys = find_match(table_key, result_data_dict)
        keys = []
        if no_match:
            result_data_dict[table_key] = no_match_empty_value.copy()
            keys = [table_key,]
        elif exact_match_keys:
            keys.extend(exact_match_keys)
        if mismatch_keys:
            for mismatch_key in mismatch_keys:
                v = result_data_dict.get(mismatch_key)
                del_keys.add(mismatch_key)
                new_key = generate_new_key_from_mismatch_key(mismatch_key, table_key)
                keys.append(new_key)
                result_data_dict[new_key] = v.copy()
        for key in keys:
            value = result_data_dict.get(key)
            for i in range(len(table_columns)):
                if i not in table_data_has_core_columns:
                    if predictor_source:
                        row_key = predictor_source+'.'+table_columns[i]
                    else:
                        row_key = table_columns[i]
                    row_value = row[i]
                    value[row_key] = row_value

    for key in del_keys:
        result_data_dict.pop(key)


def update_result_data_dict_by_table_columns(result_data_dict, table_columns, predictor_source, core_columns):
    for k,v in result_data_dict.items():
        for column in table_columns:
            if column not in core_columns:
                if predictor_source:
                    row_key = predictor_source+'.'+column
                else:
                    row_key = column
                if row_key not in v:
                    v[row_key] = '-'


def read_json_file(file_path):
    with open(file_path, 'r') as r_file:
        return json.load(r_file)


def get_allele_distances_table_data(allele_distances_data):
    allele_distances_table_data = []
    for input_allele,v in allele_distances_data.items():
        for closest_allele, distance in v.items():
            allele_distances_table_data.append([input_allele, closest_allele, distance])
    return allele_distances_table_data

def validate_allele(input_allele, method):
    allele_validator = Allele_Validator()
    invalid_alleles = []
    valid_alleles = []
    warnings = []
    errors = []
    # if method is not valid for allele_validator, skip validation
    try:
        allele_validator.validate_method(method)
    except:
        return warnings, errors
    try:
        valid_by_index = allele_validator.validate_alleles(input_allele.split(','), method)
    except:
        # many method not working (or don't need to work) with allele validator
        #errors.append(f"Please check the method name '{method} and input allele format {input_allele}")
        return warnings, errors
    allele_list = input_allele.split(',')
    for i in range(len(valid_by_index)):
        allele = allele_list[i]
        if valid_by_index[i]:
            valid_alleles.append(allele)
        else:
            invalid_alleles.append(allele)
    # This might duplicate warnings
    #if invalid_alleles:
    #    warnings.append(f"Allele(s) incompatible with {method}: {invalid_alleles}. If the allele name contains an asterisk, try specifying it with quotes.")
    if not valid_alleles:
        errors.append(f"Allele(s) incompatible with {method}: {input_allele}. If the allele name contains an asterisk, try specifying it with quotes.")
    return warnings, errors

allele_distances_table_columns = [
    { "name": "input_allele", "display_name":"Input Allele", "type": "text", "source": "core", "sort_order": 5, "row_sort_priority": None, "default_order": None, "description": "the predicted allele", "hidden": False},
    { "name": "closest_allele", "display_name":"Closest Allele", "type": "text", "source": "core", "sort_order": 5, "row_sort_priority": None, "default_order": None, "description": " its nearest neighbor in the training set", "hidden": False},
    { "name": "allele_distances", "display_name":"Distance", "type": "int", "source": "core", "sort_order": 6, "row_sort_priority": None, "default_order": None, "description": " Alleles with lower distances to the training set will have more accurate predictions. A distance of 0 indicates a perfect match between alleles and values at or below 0.1 is considered acceptable for generating accurate predictions.", "hidden": False},
]

def commandline_input_prediction(options, args):
    """ This version takes a file containing an peptide sequences as input."""

    # 1. read input params
    output_prefix = options.output_prefix
    output_format = options.output_format
    if output_format.lower() not in ['tsv', 'json']:
        eprint('The output format options are "tsv" or "json". Invalid format "%s" is given.' % output_format)
        return
    input_ic50_file = options.input_ic50_file
    output_json = ''
    if output_prefix:
        output_tsv = output_prefix+'.tsv'
        output_json = output_prefix+'.json'
    additional_result_info = {}
    warnings = []
    additional_result_info['warnings'] = warnings

    if options.json_filename:
        with open(options.json_filename, 'r') as r_file:
            input_data = json.load(r_file)
            peptide_length_range = input_data.get('peptide_length_range', None)
            if peptide_length_range:
                if peptide_length_range == "asis":
                    maximum_length = minimum_length = 0
                    lengths = []
                else:
                    minimum_length, maximum_length = map(int, peptide_length_range)
                    lengths = ','.join(map(str,range(minimum_length,maximum_length+1)))
            else:
                minimum_length, maximum_length = 8, 15
                lengths = ''
        if 'input_sequence_text_file_path' in input_data:
            fname = input_data['input_sequence_text_file_path']
            seq_file_type = 'fasta'
            peptide_length_range = input_data['peptide_length_range']
        elif 'input_sequence_fasta_uri' in input_data:
            fname = save_file_from_URI(input_data['input_sequence_fasta_uri'])
            seq_file_type = 'fasta'
            peptide_length_range = input_data['peptide_length_range']
        elif 'peptide_file_path' in input_data:
            fname = input_data['peptide_file_path']
            seq_file_type = 'peptides'
        elif 'input_sequence_text' in input_data:
            with tempfile.NamedTemporaryFile(mode='w', delete=False) as tmp_peptides_file:
                fname = tmp_peptides_file.name
                seq_file_type = 'fasta'
                tmp_peptides_file.write(input_data['input_sequence_text'])
        else:
            peptide_list = input_data.get('peptide_list')
            seq_file_type = 'peptides'
            if not options.assume_valid_flag and maximum_length:
                valid_peptides = []
                for peptide in peptide_list:
                    if len(peptide) > maximum_length or len(peptide) < minimum_length:
                        warnings.append('peptide "%s" length is out of valid range (%s)' % (peptide, '%d-%d' % (minimum_length,maximum_length)))
                    else:
                        valid_peptides.append(peptide)
                peptide_list = valid_peptides
            if not peptide_list:
                raise ValueError('No valid peptides found in the input list. Please check the the input and peptide length.')
            to_delete = []
            with tempfile.NamedTemporaryFile(mode='w', delete=False) as tmp_peptides_file:
                fname = tmp_peptides_file.name
                to_delete.append(fname)
                tmp_peptides_file.write('\n'.join(peptide_list))

        input_allele = input_data.get('alleles')
        additional_result_info['warnings'] = warnings
        additional_result_info["results"] = []

        predictors = input_data.get('predictors')

        for predictor in predictors:
            # add recommended as alias
            method = predictor.get('method', '').replace('recommended_epitope','netmhcpan_el').replace('recommended_binding','netmhcpan_ba')
            if method != "immunogenicity" or predictor.get('mask_choice','') == 'by_allele':
                allele_warnings, allele_errors = validate_allele(input_allele, method)
                if allele_errors:
                    raise ValueError('\n'.join(allele_errors))
                elif allele_warnings:
                    additional_result_info.setdefault('warnings', []).extend(warnings)
            predictor_type = predictor.get('type')
            # if not method or method==type: return type else: join
            predictor_source = '.'.join(filter(None, dict.fromkeys((predictor_type, method))))
            if predictor_type == 'processing' and method in ['netchop', 'netctl', 'netctlpan', 'basic_processing']:
                if method in ['netchop', 'netctl', 'netctlpan']:
                    predict_netchop = PredictNetChop()
                    netchop_options = dict(fasta_file=fname, allele=input_allele, peptide_length_range=peptide_length_range)
                    netchop_options.update(predictor)
                    netchop_options.pop("type")
                    netchop_options["noplot"] = True
                    result = predict_netchop.predict(NetchopOptions(**netchop_options), [])
                    if type(result) == tuple and len(result) == 2:
                        result, plots = result
                        # add predictor_type info as well instead of only mehtod?
                        plots_result = {
                            "type": "processing_plots",
                            "method": predictor_source,
                            "plots_data": plots,
                        }
                        additional_result_info['results'].append(plots_result)

                elif method == 'basic_processing':  # it would be base_processing
                    #if not input_ic50_file:
                    #    raise ValueError('Input ic50 file must be provided.')
                    result = processing_predict(predictor, fasta_file=fname, alleles=input_allele, peptide_length_range=peptide_length_range)
            elif predictor_type == 'immunogenicity':
                if seq_file_type == 'fasta':
                    peptide_fname = transfer_fasta_to_peptide_file(fname, lengths)
                elif seq_file_type == 'peptides':
                    peptide_fname = fname
                elif seq_file_type != 'peptides':
                    raise ValueError('can not accept seq_file_type: %s' % seq_file_type)
                predictor_immunogenicity = ImmunogenicityPredictor()
                result = predictor_immunogenicity.predict(input_allele=input_allele, lengths=lengths, fname=peptide_fname, seq_file_type='peptides', **predictor)
            elif predictor_type == 'binding' and method == 'mhcnp':
                from mhcnp_predicter import predict as mhcnp_predict
                if seq_file_type not in ['fasta','peptides']:
                    raise ValueError('can not accept seq_file_type: %s' % seq_file_type)
                if not output_prefix:
                    raise ValueError('Please use "-o" to specify output file prefix for method "%s"' % method)
                result = mhcnp_predict(input_allele=input_allele, lengths=lengths, fname=fname, seq_file_type=seq_file_type, output_path=output_json, **predictor)
                print('mhcnp prediction done.')
                return
            elif predictor_type == 'binding' and method == 'mhcflurry':
                from mhcflurry_predicter import predict as mhcflurry_predict
                if seq_file_type not in ['fasta','peptides']:
                    raise ValueError('can not accept seq_file_type: %s' % seq_file_type)
                elif seq_file_type == 'fasta':
                    peptide_fname = transfer_fasta_to_peptide_file(fname, lengths)
                    seq_file_type == 'peptides'
                #if not output_prefix:
                #    raise ValueError('Please use "-o" to specify output file prefix for method "%s"' % method)
                result = mhcflurry_predict(input_allele=input_allele,  fname=fname)
                #result = mhcflurry_predict(input_allele=input_allele, lengths=lengths, fname=fname, input_path=options.json_filename, output_path=output_prefix, **predictor)
                #print('mhcflurry prediction done.')
                #return
            else:
                if method == 'consensus':
                    eprint('Error: Method consensus must be run using the "splitting and aggregation" approach and cannot be executed directly with a single command. Please refer to the readme for more information.')
                    return
                mhci_predictor = MHCIPredictor(method)
                if seq_file_type == 'fasta':
                    peptide_fname = transfer_fasta_to_peptide_file(fname, lengths)
                elif seq_file_type == 'peptides':
                    peptide_fname = fname
                elif seq_file_type != 'peptides':
                    raise ValueError('can not accept seq_file_type: %s' % seq_file_type)
                result = mhci_predictor.predict(input_allele, lengths, peptide_fname, 'peptides')
                if type(result) == tuple and len(result) == 2:
                    result, distances = result
                    # add predictor_type info as well instead of only mehtod?
                    distance_result = {
                        "type": "netmhcpan_allele_distance",
                        "table_columns": ["input_allele", "closest_allele", "allele_distances"],
                        "table_data": get_allele_distances_table_data(distances),
                    }
                    additional_result_info['results'].append(distance_result)
            if output_prefix:
                try:
                    if output_format.lower()=='tsv':
                        truncate_file(output_tsv)
                        save_tsv(result, output_tsv)
                    elif output_format.lower()=='json':
                        if method == 'netchop':
                            result_type = 'residue_table'
                        else:
                            result_type = 'peptide_table'
                        result_dict = dict(method=predictor_source, type=result_type, table_columns=result[0], table_data=result[1:])
                        additional_result_info.setdefault('results', []).insert(0, result_dict)
                    else:
                        eprint('invalid output format: %s' % output_format)
                        return
                    save_json(additional_result_info, output_json)
                except PermissionError:
                    print(f"Warning: Permission denied when trying to read or create a output file for path and prefix '{output_prefix}'. Check file permissions.")
            else:
                if output_format.lower()=='tsv':
                    print_result(result)
                elif output_format.lower()=='json':
                    if method == 'netchop':
                        result_type = 'residue_table'
                    else:
                        result_type = 'peptide_table'
                    result_dict = dict(method=predictor_source, type=result_type, table_columns=result[0], table_data=result[1:])
                    additional_result_info.setdefault('results', []).insert(0, result_dict)
                    print(json.dumps(additional_result_info, indent=2))
                else:
                    eprint('invalid output format: %s' % output_format)
                    return
        additional_result_info['results'].sort(key=lambda result:ResultTypeSortingOrder.index(result['type']))

        if warnings and not options.assume_valid_flag:
            eprint('warnings:')
            eprint(*warnings, sep='\n')
        return

    elif options.filename_peptide:
        fname = options.filename_peptide
        seq_file_type = 'peptides'
        method = options.method
        input_allele = options.allele
        input_length = options.length

    elif options.download_fasta_url:
        input_sequence_text_file_path = save_file_from_URI(options.download_fasta_url)
        seq_file_type = 'peptides'
        method = options.method
        input_allele = options.allele
        input_length = options.length
        fname = transfer_fasta_to_peptide_file(input_sequence_text_file_path, input_length)

    # 2 validation
    # these method only works with JSON input for time being
    if method in ["basic_processing", "netchop", "netctl", "netctlpan", "immunogenicity", "mhcnp", "mhcflurry"]:
         raise ValueError('The method %s requires JSON input, Please specify the path of the JSON input file with the -j option. For example: \n * python3 src/tcell_mhci.py  -j [input_json_file] -f json -o [output-prefix]' % method)

    # input validation
    if not options.assume_valid_flag:
        errors =  input_validation(method, input_allele, input_length, fname, seq_file_type)
        if errors:
            eprint('validation error: %s' % errors)
            return
    allele_warnings, allele_errors = validate_allele(input_allele, method)
    if allele_errors:
        raise ValueError('\n'.join(allele_errors))
    elif allele_warnings:
        additional_result_info.setdefault('warnings', []).extend(warnings)

    # 3. predict
    if method == 'consensus':
        eprint('Error: Method consensus must be run using the "splitting and aggregation" approach and cannot be executed directly with a single command. Please refer to the readme for more information.')
        return
    predictor = MHCIPredictor(method)
    result = predictor.predict(input_allele, input_length, fname, seq_file_type)

    # 4. output
    if type(result) == tuple and len(result) == 2:
        result, distances = result
        additional_result_info['allele_distances'] = distances
    if output_prefix:
        try:
            if output_format.lower()=='tsv':
                save_tsv(result, output_tsv)
            elif output_format.lower()=='json':
                save_json(result, output_json)
            else:
                raise ValueError('invalida output format: %s' % output_format)
            save_json(additional_result_info, output_json)
        except PermissionError:
            print(f"Warning: Permission denied when trying to read or create a output file for path and prefix '{output_prefix}'. Check file permissions.")
    else:
        print_result(result)
    


def input_validation(method, input_allele, input_length, fname, seq_file_type='peptide'):
    '''input validation'''
    # TODO: determine which validations apply to all methods and which
    #       are specific.  E.g., allele, method, length validation is general
    #       and can be done before any method-specific validation using the
    #       allele-validator package
    from mhcipredictor import MHCIPredictor
    predictor = MHCIPredictor(method)
    if predictor.predictor:
        return predictor.is_valid(input_allele, input_length, fname, seq_file_type)
    else:
        return []



if __name__ == '__main__':
    Prediction().main()

def test_merge_dict_with_recursion_update():
    # TODO: remove this or move it to somewhere else
    dict1 = {
        "binding.netmhcpan_el": {
            "allele_distances": {
                "HLA-A*02:01": {
                "HLA-A*02:01": "0.000"
                }
            }
        }
    }

    dict2 = {
        "binding.netmhcpan_el": {
            "allele_distances": {
                "HLA-A*01:01": {
                "HLA-A*01:01": "0.000"
                }
            }
        }
    }

    merge_dict_with_recursion_update(dict1, dict2)

    print(dict1)

