import numpy as np
import pandas as pd
from pathlib import Path


class AlleleValidator:

    def __init__(self):
        # ==========================================================
        # Constructor
        # ==========================================================
        # Set folder paths
        DATA_DIR = str(Path(__file__).resolve().parents[0]) + "/data"
        PICKLES_DIR = str(Path(__file__).resolve().parents[0]) + "/pickles"

        # Set relevant data file paths
        MRO_PKL_PATH = PICKLES_DIR + '/' + 'tools_mapping_info.feather'
        MOL_PKL_PATH = PICKLES_DIR + '/' + 'molecule_info.feather'
        AUTOCOMP_PKL_PATH = PICKLES_DIR + '/' + 'autocomp_data_info.feather'
        METHOD_TABLE_PATH = DATA_DIR + '/' + 'method-table.xlsx'

        # Class attributes
        self.data = {
            "molecule_data": pd.read_feather(MOL_PKL_PATH), 
            "mro_data": pd.read_feather(MRO_PKL_PATH), 
            "autocomp_data": pd.read_feather(AUTOCOMP_PKL_PATH)
            }
        self.tools_method_info= self._get_tools_method_info(METHOD_TABLE_PATH)

        # Make sure 'Synonyms' columns don't have None values. If so, replace with empty string.
        self.data["molecule_data"]["Synonyms"] = self.data["molecule_data"]["Synonyms"].fillna("")
        
    
    # ==========================================================
    # Helper Functions
    # ==========================================================
    def is_single_data(self, allele):
        if isinstance(allele, str):
            return True
        elif isinstance(allele, list):
            return False
        else:
            raise TypeError(
                "Please provide string type for single allele, and list of strings for multiple alleles.")
        
    def _get_tools_method_info(self, data_path):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given a path to Excel file that has list of available methods,
        it will create a DataFrame in the following format:
        ex) | method | version | default_version | tool |
            |--------+---------+-----------------+------|
            |ann     |4.0      |True             |mhci  |
            |ann     |4.0      |True             |processing  |
            ...

        [Parameters]
        data_path (str):    Path to the method data file (method-table.xlsx)

        [Return Value]
        pd.DataFrame - DataFrame object that has additional 'tool' column.
        -----------------------------------------------------------------------------------'''
        xls = pd.read_excel(data_path, sheet_name=None, engine='openpyxl')
        df = pd.DataFrame()

        # For each excel sheet, add column 'tool' with their sheetnames as the value.
        for k, v in xls.items():
            v['tool'] = [k.lower()] * len(v)
            df = pd.concat([df, v], ignore_index=True)
        
        return df
    
    def _get_indices(self, array, val, multi_index=False):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given an array, it will use numpy to seach the value within the array, and return
        the index of where the exact match of the value was found.

        [Parameters]
        array (np.array):   Numpy array.
        val (str):  Value to search in the array.
        multi_index (bool): Tells whether to return single index or all indices.

        [Return Value]
        int (default) - index of where the value was found in the array.
        [int] - all the indices of where the value was found in the array.
        None - When value can't be found in the array.
        -----------------------------------------------------------------------------------'''
        # np.where will output e.g. (array([26920, 26921]),)
        idx_array = np.where(array == val)[0]
        if 0 == len(idx_array): return None
        if multi_index: return idx_array

        return idx_array[0]
    
    def _get_indices_substr(self, array, val, multi_index=False):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given an array, it will use numpy to seach the value within the array, and return
        the index of where the value was found as a substring.

        [Parameters]
        array (np.array):   Numpy array.
        val (str):  Value to search in the array.
        multi_index (bool): Tells whether to return single index or all indices.

        [Return Value]
        int (default) - index of where the value was found in the array.
        [int] - all the indices of where the value was found in the array.
        None - When value can't be found in the array.
        -----------------------------------------------------------------------------------'''
        # Find the exact match of the 'val'
        contains = np.array([val in item.split('|') for item in array])
        idx_array = np.where(contains)[0]

        if 0 == len(idx_array): return None
        if multi_index: return idx_array

        return idx_array[0]
    
    def _validate_tools_group(self, tools_group):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given a tools group name, validate whether the allele validator supports this
        tool group.

        [Parameters]
        tool_group (str):   Tools group name. (ex. "mhci", "mhcii")

        [Return Value]
        bool - Returns 'True' if it's a valid tools group. Else, returns 'False'.
        -----------------------------------------------------------------------------------'''
        if tools_group.lower() in self.tools_method_info['tool'].values:
            return True
        
        return False
    
    def _validate_method(self, method, version=None, tools=None):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given a tools group name, validate whether the allele validator supports this
        method.

        [Parameters]
        method (str):   Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
        version (str):  Version of the tool (ex. "4.1")
        tool (str):   Tools group name. (ex. "mhci", "mhcii", etc.)

        [Return Value]
        bool - Returns 'True' if it's a valid method. Else, returns 'False'.
        -----------------------------------------------------------------------------------'''
        info_df = self.tools_method_info
        is_valid = False

        if tools:
            info_df = info_df[(info_df['tool'])==tools]

        if method in info_df['method'].values:
            is_valid = True

        if version:
            if float(version) in info_df['version'].values:
                is_valid = True
            else:
                is_valid = False
            
        return is_valid
    
    def _split_method_and_version(self, method, version):
        '''-----------------------------------------------------------------------------------
        [Description]
        If the 'method' string is given a format 'METHOD_NAME-VERSION', then it will split
        the 'method' string into method name and version. This version that was part of the
        method string will take precedence over the version that was provided as the parameter.

        [Parameters]
        method (str):   Name of the method (ex. "netmhcpan" or "netmhcpan-4.1").
        version (str):  Version of the tool (ex. "4.1").

        [Return Value]
        pd.DataFrame, error_msg - Returns pandas DataFrame object that was filtered given
                                  'method'/'version'/'tools_group'. If successfully filtered,
                                  the 'error_msg' should be an empty string.
        None, error_msg - Returns 'None' if there was some incongruence during the validation
                          steps of 'method'/'version'/'tools_group'. The 'error_msg' will be
                          a string hinting what might have gone wrong.
        -----------------------------------------------------------------------------------'''
        splitted_parts = method.split('-')
        _method = None
        _version = None

        if 1 < len(splitted_parts):
            _method = splitted_parts[0]
            _version = splitted_parts[1]
        else:
            _method = splitted_parts[0]
            _version = version

        return _method, _version
    
    def _filter_dataframe(self, df, method=None, version=None, tools_group=None):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given a 'method' and/or version'on and/or 'tools_group', it will filter the given
        dataframe.

        [Parameters]
        df (list):  DataFrame to filter (Really it's the tools-mapping dataframe).
        method (str):   Name of the method (ex. "netmhcpan" or "netmhcpan-4.1").
        version (str):  Version of the tool (ex. "4.1").
        tool_group (str):   Tools group name (ex. "mhci", "mhcii", etc.).

        [Return Value]
        pd.DataFrame, error_msg - Returns pandas DataFrame object that was filtered given
                                  'method'/'version'/'tools_group'. If successfully filtered,
                                  the 'error_msg' should be an empty string.
        None, error_msg - Returns 'None' if there was some incongruence during the validation
                          steps of 'method'/'version'/'tools_group'. The 'error_msg' will be
                          a string hinting what might have gone wrong.
        -----------------------------------------------------------------------------------'''
        error_msg = ''

        # Validate parameters: method/version/tools_group
        if tools_group:
            if not self._validate_tools_group(tools_group):
              error_msg = f"Provided tools_group name ({tools_group}) is not a valid name"
              return None, error_msg
        
        # Validate method and version
        if method:
            '''
            From Gitlab Issue #743, it was requested that method value may accept either the
            name alone or name with a version delimited by '-'.
            ex) Valid input: 
                method: netmhciipan 
                version: 4.3
                
                method: netmhciipan-4.3
            Thus, adding method to handle the latter case.
            '''
            method, version = self._split_method_and_version(method, version)

            if not self._validate_method(method, version, tools_group):
                if version: 
                    error_msg = f'Please verify the provided method({method}-{version}).'
                else:
                    error_msg = f'Please verify the provided method({method}).'
                return None, error_msg

        # Filter dataframe by tools and method: tools-mapping (mro-df)
        if tools_group:
            df = df[df["Tool Group"] == tools_group]
        
        if method:
            # Original Tools_MRO_mapping file has 'netmhcpan_el' and 'netmhcpan_ba' to both refer to 'netmhcpan'
            if 'netmhcpan' in method : method = 'netmhcpan'
            if 'netmhciipan' in method : method = 'netmhciipan'

            df = df[df["Tool"] == method]

        if version :
            df = df[df["Tool Version"] == float(version)]

        return df.reset_index(drop=True), error_msg


    # ==========================================================
    # Allele Validator Functions Starts Here
    # ==========================================================
    def convert_methodlabel_to_iedblabel(self, method_label):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given a 'method label (tool label)', it will find the corresponding 'IEDB Label' from
        the 'mhc_alleles' file.

        [Parameters]
        method_label (str):  Tool label that each executable can accept.

        [Return Value]
        str - When a single method label is given, it returns the corresponding IEDB Label as string.
        [str] - When list of method labels are given, it returns list of corresponding IEDB Labels.
        None - If MRO ID can't be found for the given method label, then returns None.
        -----------------------------------------------------------------------------------'''
        mol_df = self.data["molecule_data"] # mhc_alleles
        mroid_array = mol_df['MRO ID'].values
        is_input_str = False
        iedb_labels = []

        if self.is_single_data(method_label) :
            method_label = [method_label]
            is_input_str = True
        
        for label in method_label:
            mroid = self._convert_methodlabel_to_mroid(label)
            if mroid is None: 
                iedb_labels.append(None)
                continue
            
            idx = self._get_indices(mroid_array, mroid)
            if idx is None:
                iedb_labels.append(None)
                continue

            iedb_labels.append(mol_df.loc[idx, 'IEDB Label'])
        
        if is_input_str: iedb_labels = iedb_labels[0]

        return iedb_labels

                
    def convert_iedblabel_to_methodlabel(self, iedb_label):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given a 'IEDB label', it will find the corresponding 'Tool Label' from
        the 'tools-mapping' file.

        [Parameters]
        iedb_label (str):  IEDB label of a tool label that each executable can accept.

        [Return Value]
        str - When a single IEDB label is given, it returns the corresponding Tool Label as string.
        [str] - When list of IEDB labels are given, it returns list of corresponding Tool Labels.
        None - If MRO ID can't be found for the given IEDB label, then returns None.
        -----------------------------------------------------------------------------------'''
        mro_df = self.data["mro_data"] # tools-mapping
        mroid_array = mro_df['MRO ID'].values
        is_input_str = False
        tool_labels = []

        if self.is_single_data(iedb_label) :
            iedb_label = [iedb_label]
            is_input_str = True

        for label in iedb_label:
            mroid = self._convert_iedblabel_to_mroid(label)
            if mroid is None: 
                tool_labels.append(None)
                continue

            idx = self._get_indices(mroid_array, mroid)
            if idx is None:
                tool_labels.append(None)
                continue

            tool_labels.append(mro_df.loc[idx, 'Tool Label'])

        if is_input_str: tool_labels = tool_labels[0]

        return tool_labels
    
    def _convert_iedblabel_to_methodlabel(self, iedb_label):
        '''-----------------------------------------------------------------------------------
        [Description]
        Same as 'convert_iedblabel_to_methodlabel'. This is a stripped down version where it 
        doesn't perform extra validation. This is mainly used for other functions within
        AlleleValidator that relys on this code to reduce potential small overhead.
        -----------------------------------------------------------------------------------'''
        mro_df = self.data["mro_data"] # tools-mapping
        mroid_array = mro_df['MRO ID'].values

        mroid = self._convert_iedblabel_to_mroid(iedb_label)
        if mroid is None: return None

        idx = self._get_indices(mroid_array, mroid)
        if idx is None: return None

        return mro_df.loc[idx, 'Tool Label']

    def convert_iedblabel_to_mroid(self, iedb_label):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given a 'IEDB label', it will find the corresponding 'MRO ID' from
        the 'mhc_alleles' file.

        [Parameters]
        iedb_label (str):  IEDB label of a tool label that each executable can accept.

        [Return Value]
        str - When a single IEDB label is given, it returns the corresponding MRO ID as string.
        [str] - When list of IEDB labels are given, it returns list of corresponding MRO IDs.
        None - If MRO ID can't be found for the given IEDB label, then returns None.
        -----------------------------------------------------------------------------------'''
        mol_df = self.data["molecule_data"] # mhc_alleles
        iedb_label_array = mol_df['IEDB Label'].values
        is_input_str = False
        mroid_list = []

        if self.is_single_data(iedb_label) :
            iedb_label = [iedb_label]
            is_input_str = True

        for label in iedb_label:
            idx = self._get_indices(iedb_label_array, label)
            if idx is None: 
                mroid_list.append(None)
                continue
            
            mroid_list.append(mol_df.loc[idx, 'MRO ID'])
        
        if is_input_str: mroid_list = mroid_list[0]

        return mroid_list
    
    def _convert_iedblabel_to_mroid(self, iedb_label):
        '''-----------------------------------------------------------------------------------
        [Description]
        Same as 'convert_iedblabel_to_mroid'. This is a stripped down version where it doesn't
        perform extra validation. This is mainly used for 'convert_iedblabel_to_methodlabel'
        to reduce potential small overhead.
        -----------------------------------------------------------------------------------'''
        mol_df = self.data["molecule_data"] # mhc_alleles
        iedb_label_array = mol_df['IEDB Label'].values
        # mro_df = self.data['mro_data']
        # iedb_label_array = mro_df['Tool Label'].values
        idx = self._get_indices(iedb_label_array, iedb_label)
        if idx is None: return None
        
        return mol_df.loc[idx, 'MRO ID']
    
    def convert_mroid_to_iedblabel(self, mroid):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given a 'MRO ID', it will find the corresponding 'IEDB Label' from
        the 'mhc_alleles' file.

        [Parameters]
        mroid (str):  MRO ID of a tool label that each executable can accept.

        [Return Value]
        str - When a single MRO ID is given, it returns the corresponding IEDB Label as string.
        [str] - When list of MRO ID are given, it returns list of corresponding MRO IDs.
        None - If MRO ID can't be found for the given IEDB label, then returns None.
        -----------------------------------------------------------------------------------'''
        mol_df = self.data["molecule_data"] # mhc_alleles
        mroid_array = mol_df['MRO ID'].values
        is_input_str = False
        iedb_labels = []

        if self.is_single_data(mroid) :
            mroid = [mroid]
            is_input_str = True
        
        for each_mroid in mroid:
            idx = self._get_indices(mroid_array, each_mroid)
            if idx is None:
                iedb_labels.append(None)
                continue

            iedb_labels.append(mol_df.loc[idx, 'IEDB Label'])

        if is_input_str: iedb_labels = iedb_labels[0]

        return iedb_labels
    
    def convert_methodlabel_to_mroid(self, method_label):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given a 'Tool Label (method label)', it will find the corresponding 'MRO ID' from
        the 'tools-mapping' file.

        [Parameters]
        method_label (str):  Tool label that each executable can accept.

        [Return Value]
        str - When a single Tool Label is given, it returns the corresponding MRO ID as string.
        [str] - When list of Tool Label are given, it returns list of corresponding MRO IDs.
        None - If MRO ID can't be found for the given Tool Label, then returns None.
        -----------------------------------------------------------------------------------'''
        mro_df = self.data["mro_data"] # tools-mapping
        tool_label_array = mro_df['Tool Label'].values
        is_input_str = False
        mroids = []

        if self.is_single_data(method_label) :
            method_label = [method_label]
            is_input_str = True
        
        for label in method_label:
            idx = self._get_indices(tool_label_array, label)
            if idx is None:
                mroids.append(None)
                continue

            mroids.append(mro_df.loc[idx, 'MRO ID'])

        if is_input_str: mroids = mroids[0]

        return mroids
    
    def _convert_methodlabel_to_mroid(self, method_label):
        '''-----------------------------------------------------------------------------------
        [Description]
        Same as 'convert_methodlabel_to_mroid'. This is a stripped down version where it doesn't
        perform extra validation. This is mainly used for 'convert_methodlabel_to_iedblabel'
        to reduce potential small overhead.
        -----------------------------------------------------------------------------------'''
        mro_df = self.data["mro_data"] # tools-mapping
        tool_label_array = mro_df['Tool Label'].values
        idx = self._get_indices(tool_label_array, method_label)
        if idx is None: return None

        return mro_df.loc[idx, 'MRO ID']
    
    def convert_mroid_to_methodlabel(self, mroid):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given a 'MRO ID', it will find the corresponding 'IEDB Label' from
        the 'mhc_alleles' file.

        [Parameters]
        mroid (str):  MRO ID of a tool label that each executable can accept.

        [Return Value]
        str - When a single MRO ID is given, it returns the corresponding IEDB Label as string.
        [str] - When list of MRO ID are given, it returns list of corresponding MRO IDs.
        None - If MRO ID can't be found for the given IEDB label, then returns None.
        -----------------------------------------------------------------------------------'''
        mro_df = self.data["mro_data"] # tools-mapping
        mroid_array = mro_df['MRO ID'].values
        is_input_str = False
        tool_labels = []

        if self.is_single_data(mroid) :
            mroid = [mroid]
            is_input_str = True
        
        for each_mroid in mroid:
            idx = self._get_indices(mroid_array, each_mroid)
            if idx is None: 
                tool_labels.append(None)
                continue

            tool_labels.append(mro_df.loc[idx, 'Tool Label'])
        
        if is_input_str: tool_labels = tool_labels[0]

        return tool_labels

    def convert_synonym_to_iedblabel(self, synonym):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given a synonym, it will find the corresponding 'IEDB Label' from
        the 'mhc_alleles' file.

        [Parameters]
        synonym (str):  Allele label that was given that is not a IEDB Label nor a tool labe.

        [Return Value]
        str - When a single synonym is given, it returns the corresponding IEDB Label as string.
        [str] - When list of synonyms are given, it returns list of corresponding IEDB Labels.
        None - If IEDB Label can't be found for the given synonym, then returns None.
        -----------------------------------------------------------------------------------'''
        mol_df = self.data["molecule_data"]
        synonym_col_array = mol_df['Synonyms'].values
        is_input_str = False
        iedb_labels = []

        if self.is_single_data(synonym) :
            synonym = [synonym]
            is_input_str = True

        for each_synonym in synonym:
            # Find the exact match of the synonym from the synonym col
            # idx = self._get_indices(synonym_col_array, each_synonym)
            idx = None
            if idx is None: 
                # Try finding the synonym as a substring
                # e.g. 'HLA-DRB5*0201' is a substring of 'HLA-DRB5*0201|DRB5*02:02'
                idx = self._get_indices_substr(synonym_col_array, each_synonym)

            # If there aren't any matches found, then there's no corresponding
            # IEDB label.
            if idx is None:
                '''
                From Gitlab issue #743, it was requested to return itself if no
                synonym was found.

                UPDATE: From recent discussion (01/21/2025), this should return
                None if there are no match.
                '''
                iedb_labels.append(None)
                continue
        
            iedb_labels.append(mol_df.loc[idx, 'IEDB Label'])

        if is_input_str: iedb_labels = iedb_labels[0]
        
        return iedb_labels
    
    def get_default_method_version(self, method):
        '''-----------------------------------------------------------------------------------
        [Description]
        Given a 'method' or 'METHOD_NAME-VERSION' format, it will return the default version of
        the method as string.

        [Parameters]
        method (str):   Name of the method (ex. "netmhcpan" or "netmhcpan-4.1").

        [Return Value]
        str - Returns the default version of the method as string.
        -----------------------------------------------------------------------------------'''
        # If user submits "method-version" format, just take the method name.
        # Version information can be discarded.
        method, version = self._split_method_and_version(method, None)

        filtered_df = self.tools_method_info[
            (self.tools_method_info['method']==method) & 
            (self.tools_method_info['default_version']==True)
            ]
        
        # Will always have a single row in the df
        version = list(filtered_df['version'].values)[0]
        
        return str(version)

    
    def get_available_lengths(self, iedb_label, method, version=None, tools_group=None):
        '''-----------------------------------------------------------------------------------
        
        [Description]\n
          Given allele (IEDB label) and it's method, it will give out available lengths.

        [Parameters]
          - iedb_label : str or [str] 
            Single allele (IEDB Label) should be passed as type string.
            Else, list of strings containing allele label (IEDB Label).
          - method : str
            Name of the method.

        [Return Value] : [str] or [[str]]\n
          Single allele will return list of available lengths for the given allele.
          For multiple alleles, it will return multiple lists, each corresponding to their 
          relevant alleles.

        -----------------------------------------------------------------------------------'''
        mro_df = self.data["mro_data"]
        is_input_str = False
        lengths = []

        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self._filter_dataframe(mro_df, method, version, tools_group)

        if err: raise ValueError(err)

        # When input is a single string, turn it into list
        if self.is_single_data(iedb_label):
            is_input_str = True
            iedb_label = [iedb_label]

        for label in iedb_label:
            mroid = self._convert_iedblabel_to_mroid(label)
            mroid_array = mro_df['MRO ID'].values
            idx = self._get_indices(mroid_array, mroid)
            if idx is None: 
                lengths.append(None)
                continue

            available_lengths = mro_df.loc[idx, 'Lengths']
            lengths.append(available_lengths)
        
        if is_input_str: lengths = lengths[0]

        return lengths


    def get_available_alleles(self, method=None, version=None, tools_group=None):
        '''-----------------------------------------------------------------------------------
        [Description]\n
        Given method/version and/or tools group, it will retrieve all available alleles (IEDB Labels).

        [Parameters] :
          - method : str
            Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
          - version : str
            Version of the tool (ex. "4.1")
          - tool_group : str
            Tools group name. (ex. "mhci", "mhcii", etc.)

        [Return Value] : [str]\n
        List of all unique IEDB labels for given method/version/tools_group.
        -----------------------------------------------------------------------------------'''
        mol_df = self.data["molecule_data"] # mhc_alleles
        mro_df = self.data["mro_data"] # tools-mapping

        # Return all allele names if no parameters are given
        if not (method or tools_group) :
            return mol_df["IEDB Label"].unique().tolist()

        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self._filter_dataframe(mro_df, method, version, tools_group)

        if err: raise ValueError(err)

        available_iedb_labels = []
        for row in mro_df.itertuples():
            mroid = row._5
            mroid_array = mol_df['MRO ID'].values
            idx = self._get_indices(mroid_array, mroid)
            if idx is None: return None

            iedb_label = mol_df.loc[idx, 'IEDB Label']
            available_iedb_labels.append(iedb_label)
        
        return available_iedb_labels

    # pretty much identify_alleles
    def get_allele_info(self, iedb_label) :
        '''-----------------------------------------------------------------------------------
        [Description] :\n
        Given allele (IEDB label), it will provide relevant information about the allele,
        such as 'IEDB Label', 'Tool Label', 'Synonyms', 'MRO ID', and 'Tools Info'.
        ex)
        {
            'IEDB Label': 'HLA-A*02:01',
            'Tool Label': 'HLA-A0201',
            'Synonyms': 'HLA-A2.1,HLA-A*020101,HLA-A02:01,HLA-A0201',
            'MRO ID': 'MRO:0001007',
            'Tools Info': {
                {
                    'Group': 'mhci',
                    'Tool': 'immunogenicity',
                    'Version': '1.0',
                    'Lengths': ''
                },
                {
                    'Group': 'pvc',
                    'Tool': 'icerfire',
                    'Version': '1.0',
                    'Lengths': '8,9,10,11,12,13,14'
                }
            },
        }

        [Parameters] :
          - iedb_label : str or [str]
            Single IEDB Label or list of IEDB Labels.
         
        [Return Value]
        dict - Dictionary that holds relevant information regarding the provided IEDB Label.
        [dict] - List of dictionaries where each dictionary is related to single IEDB Label
                 provided by the user. The order of dictionary matches the list of labels
                 given by the user.
        -----------------------------------------------------------------------------------'''
        mro_df = self.data["mro_data"] # tools-mapping
        mol_df = self.data["molecule_data"]
        is_input_str = False
        allele_info_list = []
        

        # When input is a single string, turn it into list
        if self.is_single_data(iedb_label):
            is_input_str = True
            iedb_label = [iedb_label]
        
        iedb_label_array = mol_df['IEDB Label'].values
        mroid_array = mro_df['MRO ID'].values

        for label in iedb_label:
            allele_info_dict = {
                'IEDB Label': label,
                'Tool Label': self._convert_iedblabel_to_methodlabel(label),
                'MRO ID': self._convert_iedblabel_to_mroid(label),
                'Synonyms': '',
                'Tools Info': []
            }

            idx = self._get_indices(mroid_array, allele_info_dict['MRO ID'], multi_index=True)
            if idx is None: return None
            
            # Can have more than 1 record
            filtered_tm_df = mro_df.loc[idx]
            filtered_tm_df.columns = filtered_tm_df.columns.str.replace(' ', '_', regex=False)

            for row in filtered_tm_df.itertuples():
                allele_info_dict['Tools Info'].append({
                    'Group': row.Tool_Group,
                    'Tool': row.Tool,
                    'Version': row.Tool_Version,
                    'Lengths': row.Lengths
                })

            '''Add Synonyms to the allele info dict'''
            idx = self._get_indices(iedb_label_array, label)
            mol_row = mol_df.loc[idx]

            allele_info_dict['Synonyms'] = mol_row['Synonyms']

            allele_info_list.append(allele_info_dict)
        
        if is_input_str: allele_info_list = allele_info_list[0]

        return allele_info_list


    def validate_alleles(self, iedb_label, method=None, version=None, tools_group=None):
        '''-----------------------------------------------------------------------------------
        [Description] :\n
        Given allele (IEDB label), it will return a boolean to tell whether the label is a
        valid (supported) allele for the method.

        [Parameters] :
          - iedb_label : list
            List of strings containing "IEDB Label" that supposed to match "IEDB Label"
            column from "mro-dev.tsv".
          - method : str
            Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
          - version : str
            Version of the tool (ex. "4.1")
          - tool_group : str
            Tools group name. (ex. "mhci", "mhcii", etc.)

        [Return Value]
        bool - Returns 'True' if the IEDB Label was found given method
        -----------------------------------------------------------------------------------'''
        mro_df = self.data["mro_data"] # tools-mapping
        mol_df = self.data["molecule_data"] # mhc_alleles
        validity = []
        is_input_str = False

        # Basic paremeter checks + filters dataframe according to the parameters
        mro_df, err = self._filter_dataframe(mro_df, method, version, tools_group)

        if err: raise ValueError(err)

        # When input is a single string, turn it into list
        if self.is_single_data(iedb_label):
            is_input_str = True
            iedb_label = [iedb_label]

        for label in iedb_label:
            target_df = mol_df[(mol_df['IEDB Label']==label)]
            
            if 0 < len(target_df):
                # Retrieve the MRO ID for the IEDB Label
                mroid = target_df.iloc[0]['MRO ID']
                
                # MRO dataframe will contain entries that only matches the method/verison/tools_group
                if mroid in mro_df['MRO ID'].to_list():
                    validity.append(True)
                    continue

            validity.append(False)
        
        # Make sure to return the correct type (same type as the input)
        if is_input_str: validity = validity[0]

        return validity


    def validate_allele_lengths(self, iedb_label, lengths, method=None, version=None, tools_group=None):
        '''-----------------------------------------------------------------------------------
        [Description] :\n
        Given allele (IEDB label), it will list out valid dictionary (containing valid alleles/lengths)
        and invalid dictionary (containing invalid allele/lengths combination).

        [Parameters] :
          - iedb_label : list
            List of strings containing "IEDB Label" that suppose to match "IEDB Label"
            column from "mro-dev.tsv".
          - lengths: list
            List of lengths in int or str.
          - method : str
            Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
          - version : str
            Version of the tool (ex. "4.1")
          - tool_group : str
            Tools group name. (ex. "mhci", "mhcii", etc.)

        [Return Value]
        dict, dict - Returns valid dictionary (containing valid alleles/lengths) and 
                     invalid dictionary (containing invalid allele/lengths combination).
        -----------------------------------------------------------------------------------'''
        mro_df = self.data["mro_data"] # tools-mapping
        valid_dict = {}
        invalid_dict = {}

        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self._filter_dataframe(mro_df, method, version, tools_group)

        if err: raise ValueError(err)

        # When input is a single string, turn it into list
        if self.is_single_data(iedb_label):
            iedb_label = [iedb_label]
        
        if not isinstance(lengths, list):
            lengths = [lengths]

        # Make sure all the lengths is in str
        lengths = [str(_) for _ in lengths]

        for label in iedb_label:
            mroid = self._convert_iedblabel_to_mroid(label)
            mroid_array = mro_df['MRO ID'].values
            idx = self._get_indices(mroid_array, mroid)
            if idx is None: continue
            tool_label_lengths = [str(_).strip() for _ in mro_df.loc[idx, 'Lengths'].split(',')]

            # valid lengths that the user didn't specify
            # print(set(tool_label_lengths) - set(lengths))

            # valid lengths that the user provided
            correct_lengths = list(set(tool_label_lengths) & set(lengths))
            correct_lengths.sort(key=int)

            # invalid lengths that the user specified
            incorrect_lengths = list(set(lengths) - set(tool_label_lengths))
            incorrect_lengths.sort(key=int)
            
            if correct_lengths:
                valid_dict[label] = correct_lengths
            
            if incorrect_lengths:
                invalid_dict[label] = incorrect_lengths

        return valid_dict, invalid_dict
    

    def identify_label(self, allele) :
        # Use convert methods to deduce the allele's label
        converted_allele = self._convert_iedblabel_to_methodlabel(allele)
        if converted_allele:
          return "iedb_label"
        
        converted_allele = self.convert_methodlabel_to_iedblabel(allele)
        if converted_allele:
          return "tool_label"
        
        converted_allele = self.convert_synonym_to_iedblabel(allele)
        if converted_allele:
          return "synonym"
        
        return "Unknown"


