# NOTE:
# This script is to remove all old netmhciipan alleles and add 
# valid netmhciipan alleles that are retrieved from the 
# netmhciipan-4.1-alleles-name.txt, netmhciipan-4.2-alleles-name.txt,
# and netmhciipan-4.3-alleles-name.txt files.

import time
import sys
import re
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm import tqdm
from itertools import product
PROJECT_DIR = str(Path(__file__).resolve().parents[1])
sys.path.insert(1, PROJECT_DIR)

# Get allele data
PARENT_DIR = Path(__file__).parent
DATA_DIR = PARENT_DIR.parent / "data"
TOOLS_MAPPING_FILE = DATA_DIR / "tools-mapping.tsv"
MHC_ALLELES_FILE = DATA_DIR / "mhc_alleles.tsv"



if __name__ == "__main__":

    '''=========================================================
    Update mhc_alleles.tsv Predictor Availability
    ========================================================='''
    mhc_df = pd.read_csv(MHC_ALLELES_FILE, skipinitialspace=True, sep='\t')
    tools_df = pd.read_csv(TOOLS_MAPPING_FILE, skipinitialspace=True, sep='\t')

    tools_df_headers = list(tools_df.columns)

    # NOTE: Reset mhc_df 'Predictor Availability' column to 0
    mhc_df['Predictor Availability'] = 0

    predictor_available_counter = 0
    # Iterate over each row of tools_df and print MRO ID
    # NOTE: all alleles in tools_df should be in mhc_df
    for row in tqdm(tools_df.itertuples(name=None, index=False)):
        curr_allele = row[tools_df_headers.index('Tool Label')]
        curr_mroid = row[tools_df_headers.index('MRO ID')]

        # Check if curr_allele is in mhc_df
        if curr_mroid in mhc_df['MRO ID'].values:
            mhc_df.loc[mhc_df['MRO ID'] == curr_mroid, 'Predictor Availability'] = 1
            predictor_available_counter += 1

    
    # Update the mhc_alleles file
    mhc_df.to_csv(MHC_ALLELES_FILE, sep='\t', index=False)