import os
import time
import warnings
import pickle
import pandas as pd
from pathlib import Path

def read_pickle_file(file_name):
    # with open(file_name, 'rb') as r_file:
    #     data = pickle.load(r_file)
    data = pd.read_feather(file_name)
    return data


class AlleleValidator:
    # ==============================================================================================================
    # Constructor
    # ==============================================================================================================
    def __init__(self):
        # Set folder paths
        DATA_DIR = str(Path(__file__).resolve().parents[0]) + "/data"
        PICKLES_DIR = str(Path(__file__).resolve().parents[0]) + "/pickles"

        # Set relevant data file paths
        MRO_PKL_PATH = PICKLES_DIR + '/' + 'tools_mapping_info.feather'
        MOL_PKL_PATH = PICKLES_DIR + '/' + 'molecule_info.feather'
        AUTOCOMP_PKL_PATH = PICKLES_DIR + '/' + 'autocomp_data_info.feather'
        METHOD_TABLE_PATH = DATA_DIR + '/' + 'method-table.xlsx'

        # Class attributes
        self.data = {"molecule_data": None, "mro_data": None, "autocomp_data": None}
        self.tools_method_info= self._get_tools_method_info(METHOD_TABLE_PATH)

        # Read mro file
        self.data["mro_data"] = read_pickle_file(MRO_PKL_PATH)
        
        # Read molecule file
        self.data["molecule_data"] = read_pickle_file(MOL_PKL_PATH)
        
        # Additional data required for allele autosuggestion.
        labels = self.data["molecule_data"]["IEDB Label"].squeeze().dropna().unique().tolist()
        self.data["molecule_data"]["Synonyms"] = self.data["molecule_data"]["Synonyms"].fillna("")
        
        synonyms = [_ for syn in self.data["molecule_data"]["Synonyms"].squeeze().dropna().unique().tolist() for _ in syn.split("|")]
        self.data["allele_names"] = labels + synonyms
        self.data["last_label"] = labels[-1]
        
        # Read autocomplete datasource file
        self.data["autocomp_data"] = read_pickle_file(AUTOCOMP_PKL_PATH)

        self.data["available_mhci_methods"] = pd.read_excel(
            r'{}/method-table.xlsx'.format(DATA_DIR), sheet_name="MHCI", engine='openpyxl')
        self.data["available_mhcii_methods"] = pd.read_excel(
            r'{}/method-table.xlsx'.format(DATA_DIR), sheet_name="MHCII", engine='openpyxl')

    # ==============================================================================================================
    # Helper Functions
    # ==============================================================================================================
    def is_single_data(self, allele):
        if isinstance(allele, str):
            return True
        elif isinstance(allele, list):
            return False
        else:
            raise TypeError(
                "Please provide string type for single allele, and list of strings for multiple alleles.")

    def compare_lengths(self, sample_lengths, original_lengths):
        valid_lengths = []
        invalid_lengths = []

        for length in sample_lengths :
            if length in original_lengths :
                valid_lengths.append(length)
            else :
                invalid_lengths.append(length)
        
        return valid_lengths, invalid_lengths
    
    
    def _get_tools_method_info(self, data_path):
        '''-----------------------------------------------------------------------------------
        Description :
          Given a path to Excel file that has list of available methods,
          it will create a DataFrame in the following format:
          ex) | method | version | default_version | tool |
              |--------+---------+-----------------+------|
              |ann     |4.0      |True             |mhci  |
              |ann     |4.0      |True             |processing  |
              ...

        Parameters :
          data_path (str) - Path to the method data file (method-table.xlsx)

        Return Value :
          (DataFrame) - DataFrame object that has additional 'tool' column.
        -----------------------------------------------------------------------------------'''
        xls = pd.read_excel(data_path, sheet_name=None, engine='openpyxl')
        df = pd.DataFrame()  

        # Iterate every sheets in the excel file
        for k, v in xls.items():
            tool_name = k.lower()
            v['tool'] = [tool_name] * len(v)
            df = pd.concat([df, v], ignore_index=True)
        
        return df

    def _validate_tools_group(self, tools_group):
        '''-----------------------------------------------------------------------------------
        Description :
          Given a tools group name, validate whether the allele validator supports this
          tool group.

        Parameters :
          tool_group (str) - Tools group name. (ex. "mhci", "mhcii")

        Return Value :
          (bool) - True/False
        -----------------------------------------------------------------------------------'''
        if tools_group.lower() in self.tools_method_info['tool'].values:
            return True
        
        return False
    
    def _validate_method(self, method, version=None, tools=None):
        '''-----------------------------------------------------------------------------------
        Description :
          Given a tools group name, validate whether the allele validator supports this
          tool group.

        Parameters :
          iedb_label (list) - List of strings containing "IEDB Label" that suppose to match 
                              "IEDB Label" column from "mro-dev.tsv".
          method (str) - Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
          version (str) - Version of the tool (ex. "4.1")
          tool_group (str) - Tools group name. (ex. "mhci", "mhcii", etc.)

        Return Value :
          (bool) - True/False
        -----------------------------------------------------------------------------------'''
        info_df = self.tools_method_info
        is_valid = False

        if tools:
            info_df = info_df[(info_df['tool'])==tools]

        if method in info_df['method'].values:
            is_valid = True

        if version:
            if float(version) in info_df['version'].values:
                is_valid = True
            else:
                is_valid = False
            
        return is_valid


    # ==============================================================================================================
    # Allele-Valdiator Methods
    # ==============================================================================================================
    def filter_dataframe(self, df, method=None, version=None, tools_group=None):
        error_msg = ''

        # Validate parameters: method/version/tools_group
        if tools_group:
            if not self._validate_tools_group(tools_group):
              error_msg = f"Provided tools_group name ({tools_group}) is not a valid name"
              return None, error_msg
        
        # Validate method and version
        if method:
            if not self._validate_method(method, version, tools_group):
                error_msg = f'Please verify the provided method({method}).'
                return None, error_msg
                
        # Filter dataframe by tools and method
        if tools_group:
            df = df[df["Tool Group"] == tools_group]

        if method:
            # Original Tools_MRO_mapping file has 'netmhcpan_el' and 'netmhcpan_ba' to both refer to 'netmhcpan'
            if 'netmhcpan' in method : method = 'netmhcpan'

            df = df[df["Tool"] == method]

        if version :
            df = df[df["Tool Version"] == float(version)]

        return df.reset_index(drop=True), error_msg


    def validate_alleles(self, iedb_label, method=None, version=None, tools_group=None):
        '''======================================================================================

        Description :\n
          Given allele (IEDB label), it will list out valid dictionary (containing valid alleles/lengths)
          and invalid dictionary (containing invalid allele/lengths combination).

        Parameters :
          - iedb_label : list
            List of strings containing "IEDB Label" that suppose to match "IEDB Label"
            column from "mro-dev.tsv".
          - method : str
            Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
          - version : str
            Version of the tool (ex. "4.1")
          - tool_group : str
            Tools group name. (ex. "mhci", "mhcii", etc.)

        Return Value : dictionaries\n
          Valid and Invalid dictionaries

        ======================================================================================'''
        mro_df = self.data["mro_data"]
        validity = []

        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self.filter_dataframe(mro_df, method, version, tools_group)
        
        if err: raise ValueError(err)

        # Extract 'IEDB Label' as series
        mro_iedb_series = mro_df["IEDB Label"]

        # If iedb_label is a single data (str)
        if self.is_single_data(iedb_label):
            if iedb_label in mro_iedb_series.values:
                return True
            else:
                return False

        # At this point, iedb_label is multiple values
        for each_allele in iedb_label:
            if each_allele in mro_iedb_series.values:
                validity.append(True)
            else:
                validity.append(False)
                
        return validity

    def validate_alleles_v2(self, iedb_label, method=None, version=None, tools_group=None):
        mro_df = self.data["mro_data"] # tools-mapping
        mol_df = self.data["molecule_data"] # mhc_alleles

        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self.filter_dataframe(mro_df, method, version, tools_group)

        if err: raise ValueError(err)

        # single data: 'iedb_label' is a string
        if self.is_single_data(iedb_label):
            target_df = mol_df

            if tools_group:
                target_df = mol_df[(mol_df['IEDB Label']==iedb_label) & (mol_df['Tool Group']==tools_group)]
                # print(target_df)
            
            if method:
                # Retrieve the MRO ID for the IEDB Label
                mroid = target_df['MRO ID'].to_list()[0]
                    
                if version:
                    # Use 'MRO ID' to search for the allele in tools-mapping file
                    target_df = mro_df[(mro_df['MRO ID']==mroid) & (mro_df['Tool']==method) & (mro_df['Tool Version']==version)]
                else:
                    # Use 'MRO ID' to search for the allele in tools-mapping file
                    target_df = mro_df[(mro_df['MRO ID']==mroid) & (mro_df['Tool']==method)]
            
            if not tools_group and not method:
                target_df = mol_df[(mol_df['IEDB Label']==iedb_label)]
            
            # print(target_df)

            if 0 < len(target_df):
                return True
            
        return False




    def validate_allele_lengths(self, iedb_label, lengths, method=None, version=None, tools_group=None):
        '''======================================================================================

        Description :\n
          Given allele (IEDB label), it will listout valid dictionary (containing valid alleles/lengths)
          and invalid dictionary (containing invalid allele/lengths combination).

        Parameters :
          - iedb_label : list
            List of strings containing "IEDB Label" that suppose to match "IEDB Label"
            column from "mro-dev.tsv".
          - lengths : [str]
            List of integers.
          - method : str
            Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
          - version : str
            Version of the tool (ex. "4.1")
          - tool_group : str
            Tools group name. (ex. "mhci", "mhcii", etc.)
          

        Return Value : dictionaries\n
          Valid and Invalid dictionaries

        ======================================================================================'''
        mro_df = self.data["mro_data"] # tools-mapping
        mol_df = self.data["molecule_data"] # mhc_alleles

        valid_dict = {}
        invalid_dict = {}

        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self.filter_dataframe(mro_df, method, version, tools_group)

        if err: raise ValueError(err)

        # Extract 'IEDB Label' as series
        mro_iedb_series = mro_df["IEDB Label"]

        # single data
        if self.is_single_data(iedb_label):
            if iedb_label in mro_iedb_series.values:
                # Get index of the allele
                target_idx = mro_df["IEDB Label"].loc[lambda x: x == iedb_label].index[0]

                # Get the correlated lengths
                target_lengths = mro_df["Lengths"][target_idx]
                target_lengths = [str(length) for length in target_lengths.split(",")]
                    
                valid_lengths, invalid_lengths = self.compare_lengths(lengths, target_lengths)
                
                if valid_lengths:
                    valid_dict[iedb_label] = valid_lengths
                if invalid_lengths:
                    invalid_dict[iedb_label] = invalid_lengths
                return valid_dict, invalid_dict
            else :
                invalid_dict[iedb_label] = lengths
                # raise ValueError("Please provide a correct IEDB Label.")

        # if iedb_label not in mro_iedb_series.values : return False
        for each_allele in iedb_label:
            if each_allele in mro_iedb_series.values:
                # Get index of the allele
                target_idx = mro_df["IEDB Label"].loc[lambda x: x == each_allele].index[0]

                # Get the correlated lengths
                target_lengths = mro_df["Lengths"][target_idx]
                target_lengths = [str(length) for length in target_lengths.split(",")]

                valid_lengths, invalid_lengths = self.compare_lengths(lengths, target_lengths)
                if valid_lengths:
                    valid_dict[each_allele] = valid_lengths
                if invalid_lengths:
                    invalid_dict[each_allele] = invalid_lengths
            else :
                invalid_dict[each_allele] = lengths
                # raise ValueError("Please provide a correct IEDB Label.")
                
        return valid_dict, invalid_dict


    def get_alleles(self, method=None, version=None, tools_group=None):
        '''======================================================================================
        
        Description :\n
          Given method or tools group, it will retrieve all available alleles (IEDB Label).

        Parameters :
          - tool_group : str
            Tools group name. (ex. "mhci", "mhcii", etc.)
          - method : str
            Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
          - version : str
            Version of the tool (ex. "4.1")
            
        Return Value : [str]\n
          List of allele names as strings.

        ======================================================================================'''
        mro_df = self.data["mro_data"]

        # Return all allele names if no parameters are given
        if not (method or tools_group) :
            alleles = list(mro_df["IEDB Label"].unique())
            return [_ for _ in alleles if str(_) != "nan"]

        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self.filter_dataframe(mro_df, method, version, tools_group)

        if err: raise ValueError(err)

        alleles = list(mro_df["IEDB Label"])
        alleles = [_ for _ in alleles if str(_) != "nan"]

        return alleles

    def convert_mroid_to_methodlabel(self, mro_ids, method, version=None, tools_group=None):
        '''======================================================================================
        
        Description :\n
          Given MRO ID and method, return the tools label.

        Parameters :
          mro_id : list of str
            List of MRO IDs from "mr-dev.tsv"
          method : str
            Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
          version : str
            Version of the tool (ex. "4.1")
          tool_group : str
            Tools group name. (ex. "mhci", "mhcii", etc.)
            
        Return Value : [str]\n
          List of strings (which are alleles ordered by the input) || list of None

        ======================================================================================'''
        # Make sure either mro_id or iedb_label is provided.
        if not mro_ids :
            raise ValueError("Please provide valid IEDB labels (ex. BoLA-1:02301).")

        mro_df = self.data["mro_data"]
        
        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self.filter_dataframe(mro_df, method, version, tools_group)

        if err: raise ValueError(err)

        ''' filter by mro_df by MRO ID / IEDB Label '''
        if self.is_single_data(mro_ids) :
            methodlabel = None
            idx = mro_df.index[mro_df["MRO ID"] == mro_ids].tolist()
            
            if len(idx) != 0 :
                methodlabel = mro_df.loc[idx[0]]["Tool Label"]

        else :
            methodlabel = []
            for each_idlabel in mro_ids :
                idx = mro_df.index[mro_df["MRO ID"] == each_idlabel].tolist()
                
                if len(idx) != 0 :
                    label = mro_df.loc[idx[0]]["Tool Label"]
                    methodlabel.append(label)
                else :
                    methodlabel.append(None)

        return methodlabel
    
    def _convert_mroid_to_iedblabel(self, mro_ids, method, version=None, tools_group=None):
        '''======================================================================================
        
        Description :\n
          Given MRO ID and method, return the tools label.

        Parameters :
          mro_id : list of str
            List of MRO IDs from "mr-dev.tsv"
          method : str
            Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
          version : str
            Version of the tool (ex. "4.1")
          tool_group : str
            Tools group name. (ex. "mhci", "mhcii", etc.)
            
        Return Value : [str]\n
          List of strings (which are alleles ordered by the input) || list of None

        ======================================================================================'''
        # Make sure either mro_id or iedb_label is provided.
        if not mro_ids :
            raise ValueError("Please provide valid IEDB labels (ex. BoLA-1:02301).")

        mro_df = self.data["mro_data"]
        
        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self.filter_dataframe(mro_df, method, version, tools_group)

        if err: raise ValueError(err)

        ''' filter by mro_df by MRO ID / IEDB Label '''
        if self.is_single_data(mro_ids) :
            methodlabel = None
            idx = mro_df.index[mro_df["MRO ID"] == mro_ids].tolist()
            
            if len(idx) != 0 :
                methodlabel = mro_df.loc[idx[0]]["IEDB Label"]

        else :
            methodlabel = []
            for each_idlabel in mro_ids :
                idx = mro_df.index[mro_df["MRO ID"] == each_idlabel].tolist()
                
                if len(idx) != 0 :
                    label = mro_df.loc[idx[0]]["IEDB Label"]
                    methodlabel.append(label)
                else :
                    methodlabel.append(None)

        return methodlabel
    
    def _convert_methodlabel_to_iedblabel(self, method_label):
        mro_df = self.data["mro_data"]
        iedblabel = None

        idx = mro_df.index[mro_df["Tool Label"] == method_label].tolist()

        if len(idx) != 0 : 
            iedblabel = mro_df.loc[idx[0]]["IEDB Label"]

        return iedblabel

    def _convert_methodlabel_to_mroid(self, method_label):
        mro_df = self.data["mro_data"]
        mroid = None

        idx = mro_df.index[mro_df["Tool Label"] == method_label].tolist()

        if len(idx) != 0 : 
            mroid = mro_df.loc[idx[0]]["MRO ID"]

        return mroid
    
    def _identify_unknown_allele(self, allele):
        import re
        mro_df = self.data["mro_data"]
        data = None

        clean_tool_labels = []
        clean_iedb_labels = []
        for row in mro_df.itertuples():
            clean_tool_labels.append(re.sub('\W+','', row._4))
            clean_iedb_labels.append(re.sub('\W+','', row._5))
            
            
        mro_df['clean_tool_label'] = clean_tool_labels
        mro_df['clean_iedb_label'] = clean_iedb_labels

        clean_allele = re.sub('\W+', '', allele)
        idx = mro_df.index[mro_df["clean_tool_label"] == clean_allele].tolist()
        if len(idx) != 0 : 
            mroid = mro_df.loc[idx[0]]["MRO ID"]
            iedblabel = mro_df.loc[idx[0]]["IEDB Label"]
            toollabel = mro_df.loc[idx[0]]["Tool Label"]
            data = (mroid, toollabel, iedblabel)
            return data
        
        idx = mro_df.index[mro_df["clean_iedb_label"] == clean_allele].tolist()
        if len(idx) != 0 : 
            mroid = mro_df.loc[idx[0]]["MRO ID"]
            iedblabel = mro_df.loc[idx[0]]["IEDB Label"]
            toollabel = mro_df.loc[idx[0]]["Tool Label"]
            data = (mroid, toollabel, iedblabel)
            
        return data


    def convert_methodlabel_to_iedblabel(self, method_labels, method, version=None, tools_group=None):
        '''======================================================================================
        
        Description :\n
          Given tool label, it will convert it to IEDB label.

        Parameters :
          method_labels : str or [str]
            String or list of Tool Label from "mro-dev.tsv"
          method : str
            Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
          version : str
            Version of the tool (ex. "4.1")
          tool_group : str
            Tools group name. (ex. "mhci", "mhcii", etc.)
            
        Return Value : str or [str]\n
          Single tool label will return string of corresponding IEDB Label.
          Single or multiple tool labels will return list of IEDB Labels. All unmatched ones
          will return None.

        ======================================================================================'''

        # Make sure either mro_id or iedb_label is provided.
        if not method_labels :
            raise ValueError("Please provide valid IEDB labels (ex. BoLA-1:02301).")

        mro_df = self.data["mro_data"]
        
        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self.filter_dataframe(mro_df, method, version, tools_group)

        if err:
            raise ValueError(err)

        ''' filter by mro_df by MRO ID / IEDB Label '''
        if self.is_single_data(method_labels) :
            iedblabel = None
            idx = mro_df.index[mro_df["Tool Label"] == method_labels].tolist()
            
            if len(idx) != 0 :
                iedblabel = mro_df.loc[idx[0]]["IEDB Label"]

        else :
            iedblabel = []
            for each_idlabel in method_labels :
                idx = mro_df.index[mro_df["Tool Label"] == each_idlabel].tolist()
                
                if len(idx) != 0 :
                    label = mro_df.loc[idx[0]]["IEDB Label"]
                    iedblabel.append(label)
                else :
                    iedblabel.append(None)

        return iedblabel


    def convert_iedblabel_to_methodlabel(self, iedb_labels, method, version=None, tools_group=None):
        '''======================================================================================
        
        Description :\n
          Given IEDB Label and method, return the tool label.

        Parameters :
          iedb_label : str or [str]
            String or list of IEDB Label from "mro-dev.tsv"
          method : str
            Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
          version : str
            Version of the tool (ex. "4.1")
          tool_group : str
            Tools group name. (ex. "mhci", "mhcii", "mhcnp", etc.)
          
        Return Value : [str]\n
          List of strings (which are alleles ordered by the input) || list of None

        ======================================================================================'''
        # Make sure either mro_id or iedb_label is provided.
        if not iedb_labels :
            raise ValueError("Please provide valid IEDB labels (ex. BoLA-1:02301).")

        mro_df = self.data["mro_data"]
        
        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self.filter_dataframe(mro_df, method, version, tools_group)

        if err:
            raise ValueError(err)

        ''' filter by mro_df by MRO ID / IEDB Label '''
        if self.is_single_data(iedb_labels) :
            methodlabel = None
            idx = mro_df.index[mro_df["IEDB Label"] == iedb_labels].tolist()
            
            if len(idx) != 0 :
                methodlabel = mro_df.loc[idx[0]]["Tool Label"]

        else :
            methodlabel = []
            for each_idlabel in iedb_labels :
                idx = mro_df.index[mro_df["IEDB Label"] == each_idlabel].tolist()
                
                if len(idx) != 0 :
                    label = mro_df.loc[idx[0]]["Tool Label"]
                    methodlabel.append(label)
                else :
                    methodlabel.append(None)

        return methodlabel

    def convert_methodlabel_to_mroid(self, tools_label, method, version=None, tools_group=None):
        '''======================================================================================
          
          Description :\n
            Given the tool label, it will output MRO ID.

          Data Sheet :       
            "mro-dev.tsv"
          
          Parameters :
            - tool_label : str or [str]
              String or list of IEDB Label from "mro-dev.tsv"
            - method : str
              Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
            - version : str
              Version of the tool (ex. "4.1")

          Return Value : str or [str]\n
            Single tool label will return MRO ID as string.
            Single or multiple tool labels provided as list will return corresponding MRO IDs 
            as list of strings. All undetermined ones will return None.

        ======================================================================================'''
        # Make sure tools_label is provided
        if not tools_label :
            raise ValueError("Please provide valid tools labels (ex. BoLA-1:00901).")

        mro_df = self.data["mro_data"]
        
        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self.filter_dataframe(mro_df, method, version, tools_group)

        if err:
            return [None] * len(tools_label)

        ''' filter by mro_df by MRO ID / IEDB Label '''
        if self.is_single_data(tools_label) :
            mro_ids = None
            idx = mro_df.index[mro_df["Tool Label"] == tools_label].tolist()
            
            if len(idx) != 0 :
                mro_ids = mro_df.loc[idx[0]]["MRO ID"]

        else :
            mro_ids = []
            for each_label in tools_label :
                idx = mro_df.index[mro_df["Tool Label"] == each_label].tolist()

                if len(idx) != 0 :
                    label = mro_df.loc[idx[0]]["MRO ID"]
                    mro_ids.append(label)
                else :
                    mro_ids.append(None)

        return mro_ids

    def convert_iedblabel_to_mroid(self, iedb_label, method, version=None, tools_group=None):
        '''======================================================================================
          
          Description :\n
            Given the IEDB label, it will output MRO ID.

          Data Sheet :       
            "mro-dev.tsv"
          
          Parameters :
            - iedb_label : str or [str]
              String or list of IEDB Label from "mro-dev.tsv"
            - method : str
              Name of the method (ex. "netmhcpan" or "netmhcpan-4.1")
            - version : str
              Version of the tool (ex. "4.1")

          Return Value : str or [str]\n
            Single IEDB label will return MRO ID as string.
            Single or multiple IEDB labels provided as list will return corresponding MRO IDs 
            as list of strings. All undetermined ones will return None.

        ======================================================================================'''

        # Make sure tools_label is provided
        if not iedb_label :
            raise ValueError("Please provide valid IEDB labels (ex. BoLA-1:00901).")

        mro_df = self.data["mro_data"]
        
        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self.filter_dataframe(mro_df, method, version, tools_group)

        if err:
            return [None] * len(iedb_label)

        ''' filter by mro_df by MRO ID / IEDB Label '''
        if self.is_single_data(iedb_label) :
            mro_ids = None
            idx = mro_df.index[mro_df["IEDB Label"] == iedb_label].tolist()
            
            if len(idx) != 0 :
                mro_ids = mro_df.loc[idx[0]]["MRO ID"]

        else :
            mro_ids = []
            for each_label in iedb_label :
                idx = mro_df.index[mro_df["IEDB Label"] == each_label].tolist()

                if len(idx) != 0 :
                    label = mro_df.loc[idx[0]]["MRO ID"]
                    mro_ids.append(label)
                else :
                    mro_ids.append(None)

        return mro_ids


    def convert_synonym_to_iedblabel(self, synonym, tools_group=None):
        '''======================================================================================
        
        Description :\n
          Given a synonym or list of synonyms, it will return the corresponding IEDB labels.
          If the synonym is already in IEDB label, it will simply return IEDB label.
          
          NOTE : Currently, there's no synonym, where synonym matches exactly the IEDB label.
          However, once tools label are added to the synonym, it will happen.

        Parameters :
          - synonym : str or [str] 
            Single synonym should be passed as type string.
            Else, list of strings containing synonyms.
          
        Return Value : str or [str]\n
          Single synonym will return the corresponding IEDB Label found in 'molecule-dev.tsv' as string.
          Single or multiple synonyms passed in as list will return corresponding IEDB Label as lists.
          Those that aren't able to be found, will be returned as None.

        ======================================================================================'''
        # Make sure tools_label is provided
        if not synonym :
            raise ValueError("Please provide valid synonym.")

        mol_df = self.data["molecule_data"]
        mol_header = list(mol_df.columns)
                
        # Basic paremeter check + filters dataframe according to the parameter
        mol_df, err = self.filter_dataframe(mol_df, tools_group=tools_group)

        if err: warnings.warn(err)

        ''' filter by mro_df by MRO ID / IEDB Label '''
        if self.is_single_data(synonym) :
            iedb_label = None

            # Check if there's a synonym and return it's IEDB label
            end_flag = 0
            for row in mol_df.itertuples(name=None):
                row_synonym = row[mol_header.index('Synonyms') + 1]
                
                if not row_synonym :
                    continue

                if synonym in row_synonym :
                    iedb_label = row[mol_header.index('IEDB Label') + 1]
                    end_flag = 1
                    break

            # If there's no synonym found, then try checking if the provided allele is already an IEDB Label
            if not end_flag :
              for row in mol_df.itertuples(name=None):
                  # print("synonym hasn't been found yet...")
                  row_iedb_label = row[mol_header.index('IEDB Label') + 1]

                  if synonym == row_iedb_label :
                      iedb_label = synonym

        else :
            iedb_label = []

            for each_synonym in synonym :
                corr_iedb_label = ''
                for row in mol_df.itertuples(name=None):
                    row_synonym = row[mol_header.index('Synonyms') + 1]
                    
                    if not row_synonym :
                        continue

                    if each_synonym in row_synonym :
                        corr_iedb_label = row[mol_header.index('IEDB Label') + 1]
                        break
                
                if corr_iedb_label :
                    iedb_label.append(corr_iedb_label)
                else :
                    iedb_label.append(None)

            for i in range(len(iedb_label)) :
                if iedb_label[i] != None : continue

                each_label = synonym[i]
                
                for row in mol_df.itertuples(name=None):
                    row_iedb_label = row[mol_header.index('IEDB Label') + 1]
                    
                    if each_label == row_iedb_label :
                        iedb_label[i] = each_label
                        break

        return iedb_label


    def get_available_lengths(self, iedb_label, method, version=None):
        '''======================================================================================
        
        Description :\n
          Given allele (IEDB label) and it's method, it will give out available lengths.

        Parameters :
          - iedb_label : str or [str] 
            Single allele (IEDB Label) should be passed as type string.
            Else, list of strings containing allele label (IEDB Label).
          - method : str
            Name of the method.

        Return Value : [str] or [[str]]\n
          Single allele will return list of available lengths for the given allele.
          For multiple alleles, it will return multiple lists, each corresponding to their 
          relevant alleles.

        ======================================================================================'''

        mro_df = self.data["mro_data"]

        # Basic paremeter check + filters dataframe according to the parameter
        mro_df, err = self.filter_dataframe(mro_df, method, version)

        # if err: raise ValueError(err)
        if err: raise ValueError(err)


        if self.is_single_data(iedb_label) :
            try :            
                # Get index of the allele
                target_idx = mro_df["IEDB Label"].loc[lambda x: x==iedb_label].index[0]

                # Get the correlated lengths
                lengths = mro_df["Lengths"][target_idx]
                lengths = [str(length) for length in lengths.split(",")]
            except IndexError :
                lengths = None
        
        else :
            lengths = []
            for each_allele in iedb_label :
                try :
                    # Get index of the allele
                    target_idx = mro_df["IEDB Label"].loc[lambda x: x==each_allele].index[0]

                    # Get the correlated lengths
                    length = mro_df["Lengths"][target_idx]
                    length = [str(_) for _ in length.split(",")]
                except IndexError :
                    length = None

                lengths.append(length)
        
        return lengths

    def get_allele_info(self, iedb_label) :
        mol_df = self.data["molecule_data"]

        if self.is_single_data(iedb_label) :  
            allele_info = None

            # Get index of the allele
            target_idx = mol_df["IEDB Label"].loc[lambda x: x==iedb_label].index[0]
            target_row = mol_df.loc[[target_idx]]
            allele_info = target_row.set_index("IEDB Label").T.to_dict('dict')
        else :
            allele_info = {}
            
            for each_label in iedb_label :
                target_idx = mol_df["IEDB Label"].loc[lambda x: x==each_label].index[0]
                target_row = mol_df.loc[[target_idx]]
                one_allele_info = target_row.set_index("IEDB Label").T.to_dict('dict')

                for k, v in one_allele_info.items() :
                    allele_info[k] = v
                
        return allele_info
    
    def identify_label(self, allele, method) :
        # Use convert methods to deduce the allele's label
        converted_allele = self.convert_iedblabel_to_methodlabel(allele, method=method)
        if converted_allele:
          return "iedb_label"
        
        converted_allele = self.convert_methodlabel_to_iedblabel(allele, method=method)
        if converted_allele:
          return "tool_label"
        
        converted_allele = self.convert_synonym_to_iedblabel(allele)
        if converted_allele:
          return "synonym"
        
        return "Unknown"