import os
import re
import json
import subprocess
import unittest


def find_root_dir(start_dir=None, anchor_files=None):
    """Find the app root directory by looking for known anchor files."""
    if start_dir is None:
        start_dir = os.getcwd()  # Default to the current working directory
    
    if anchor_files is None:
        anchor_files = ['LICENSE']
    
    # Normalize to absolute path
    current_dir = os.path.abspath(start_dir)
    
    # Traverse up the directory tree until an anchor file is found or root is reached
    while current_dir != os.path.dirname(current_dir):
        # Check for anchor files in the current directory
        for anchor in anchor_files:
            if os.path.isfile(os.path.join(current_dir, anchor)) or os.path.isdir(os.path.join(current_dir, anchor)):
                return current_dir  # Return the directory where the anchor file is found

        # Check for anchor files in child directories
        for root, dirs, files in os.walk(current_dir):
            for anchor in anchor_files:
                if anchor in dirs or anchor in files:
                    return root  # Return the child directory where the anchor file is found

        current_dir = os.path.dirname(current_dir)  # Move up one level
    
    # If no anchor file is found, return None or raise an error
    return None

class TestCluster(unittest.TestCase):
    APP_ROOT_DIR = find_root_dir(anchor_files=['license-LJI.txt'])
    CLUSTER_EXEC = os.path.join(APP_ROOT_DIR, 'src', 'run_cluster.py')
    EXAMPLE_DIR = os.path.join(APP_ROOT_DIR, 'examples')
    TEST_DATA_DIR = os.path.join(APP_ROOT_DIR, 'test_data')

    def test_basic_command(self):
        cmd = [
            'python', self.CLUSTER_EXEC, '-j', f'{self.EXAMPLE_DIR}/cluster.json'
            ]
        result = subprocess.run(cmd, capture_output=True, text=True)
        
        # Replace all tabs to spaces
        result = result.stdout.strip().replace('\t', ' ')

        expected_result = '''
cluster_number  peptide_number  alignment       position        sequence_number peptide cluster_consensus
1.1     Singleton       LEQIHVLENSLVL   -       Mus Pep1        LEQIHVLENSLVL   LEQIHVLENSLVL
2.1     Singleton       FVEHIHVLENSLAFK -       Mus Pep2        FVEHIHVLENSLAFK FVEHIHVLENSLAFK
3.1     Singleton       GLYGREPDLSSDIKERFA      -       Mus Pep3        GLYGREPDLSSDIKERFA      GLYGREPDLSSDIKERFA
4.1     Singleton       EWFSILLASDKREKI -       Mus Pep4        EWFSILLASDKREKI EWFSILLASDKREKI'''

        # Replace all chunk of spaces to single space (keep the newlines)
        expected_result = expected_result.strip()
        expected_result = re.sub(r'[ \t]+', ' ', expected_result)

        # Comparison
        self.assertEqual(result, expected_result)
    
    def test_json_format(self):
        cmd = [
            'python', self.CLUSTER_EXEC, '-j', 
            f'{self.EXAMPLE_DIR}/cluster.json', 
            '-o', f'{self.TEST_DATA_DIR}/cluster_break/output', 
            '-f', 'json'
            ]
        
        # Run the process
        subprocess.run(cmd)

        with open(f'{self.TEST_DATA_DIR}/cluster_break/output.json','r') as f:
            result = json.load(f)

        with open(f'{self.TEST_DATA_DIR}/cluster_break/expected_output.json','r') as f:
            expected_output = json.load(f)

        self.assertEqual(result, expected_output)

    def test_split_aggregate(self):
        cmd = [
            'python', self.CLUSTER_EXEC, 
            '-j', f'{self.EXAMPLE_DIR}/cluster.json',
            '--split', f'--split-dir={self.EXAMPLE_DIR}/job/parameter_units',
        ]
    
        descr_result = subprocess.run(cmd, capture_output=True, text=True).stdout
        descr_result = list(filter(None, descr_result.split('\n')))
        job_description_path = descr_result[-1].split(':')[1].strip()
        
        with open(job_description_path, 'r') as f:
            content = json.load(f)
            first_content = content[0]
            second_content = content[1]

        ''' Testing the first job ID '''
        job_cmd = first_content['shell_cmd']
        
        # This particular example has only one output path
        job_result_file = first_content['expected_outputs'][0]        

        # Run the job command
        job_cmd = job_cmd.split(' ')
        job_cmd.insert(0, 'python')
        
        subprocess.run(job_cmd)

        # Check expected output
        with open(job_result_file, 'r') as f:
            resulting_output = f.readlines()
            resulting_output = ''.join(resulting_output)
            resulting_output = resulting_output.replace(' ', '')
        
        expected_job_result_file = f'{self.APP_ROOT_DIR}/tests/expected_0.json'
        with open(expected_job_result_file, 'r') as f:
            expected_resulting_output = f.readlines()            
            expected_resulting_output = ''.join(expected_resulting_output)
            expected_resulting_output = expected_resulting_output.replace(' ', '')

        self.assertEqual(resulting_output, expected_resulting_output)

        ''' Testing the second job ID - aggregation '''
        job_cmd = second_content['shell_cmd']
        
        # This particular example has only one output path
        job_result_file = second_content['expected_outputs'][0]

        # Run the job command
        job_cmd = job_cmd.split(' ')
        job_cmd.insert(0, 'python')

        # Don't have to print the output to the terminal
        subprocess.run(job_cmd, stdout=subprocess.DEVNULL)

        # Check expected output
        with open(job_result_file, 'r') as f:
            resulting_output = f.readlines()
            resulting_output = ''.join(resulting_output)
            resulting_output = resulting_output.replace(' ', '')
        
        expected_job_result_file = f'{self.APP_ROOT_DIR}/tests/expected_aggregated_result.json'
        with open(expected_job_result_file, 'r') as f:
            expected_resulting_output = f.readlines()            
            expected_resulting_output = ''.join(expected_resulting_output)
            expected_resulting_output = expected_resulting_output.replace(' ', '')

        self.assertEqual(resulting_output, expected_resulting_output)


if __name__=='__main__':
    unittest.main()