import edlib

def find_indels_with_edlib(seq1, seq2):
    """
    Find insertions and deletions using edlib.align()
    
    Args:
        seq1, seq2: Sequences to compare
        
    Returns:
        dict with 'insertions', 'deletions', 'matches', 'mismatches'
    """
    # Align sequences with edlib
    result = edlib.align(seq1, seq2, mode='NW', task="path")
    
    # Get aligned sequences with gaps
    pretty_result = edlib.getNiceAlignment(result, seq1, seq2)
    aligned_seq1 = pretty_result['query_aligned']
    aligned_seq2 = pretty_result['target_aligned']
    
    insertions = []
    deletions = []
    matches = []
    mismatches = []
    
    pos1 = 0  # Position in original seq1
    pos2 = 0  # Position in original seq2
    
    for i in range(len(aligned_seq1)):
        char1 = aligned_seq1[i]
        char2 = aligned_seq2[i]
        
        if char1 == char2 and char1 != '-':
            # Match
            if not matches or matches[-1][2] == 0:
                matches.append([pos1, pos2, 1])
            else:
                matches[-1][2] += 1
            pos1 += 1
            pos2 += 1
        elif char1 == '-':
            # Insertion (gap in seq1)
            if not insertions or insertions[-1][2] == 0:
                insertions.append([pos1, pos2, 1, char2])
            else:
                insertions[-1][2] += 1
                insertions[-1][3] += char2
            pos2 += 1
        elif char2 == '-':
            # Deletion (gap in seq2)
            if not deletions or deletions[-1][2] == 0:
                deletions.append([pos1, pos2, 1, char1])
            else:
                deletions[-1][2] += 1
                deletions[-1][3] += char1
            pos1 += 1
        else:
            # Mismatch
            if not mismatches or mismatches[-1][2] == 0:
                mismatches.append([pos1, pos2, 1, char1, char2])
            else:
                mismatches[-1][2] += 1
                mismatches[-1][3] += char1
                mismatches[-1][4] += char2
            pos1 += 1
            pos2 += 1
    
    return {
        'insertions': insertions,
        'deletions': deletions,
        'matches': matches,
        'mismatches': mismatches,
        'aligned_seq1': aligned_seq1,
        'aligned_seq2': aligned_seq2
    }

# Example usage
if __name__ == "__main__":
    # print("="*60)
    # print("FINDING INDELS WITH EDLIB.ALIGN()")
    # print("="*60)
    
    # # Example 1: Insertion
    # print("\n1. INSERTION EXAMPLE:")
    # seq1 = "VVLSWAPPV"
    # seq2 = "VVLSWAPPPV"  # Has insertion of 'P'
    
    # print(f"Sequence 1: {seq1}")
    # print(f"Sequence 2: {seq2}")
    
    # indel_info = find_indels_with_edlib(seq1, seq2)
    
    # print(f"Aligned sequences:")
    # print(f"Seq1: {indel_info['aligned_seq1']}")
    # print(f"Seq2: {indel_info['aligned_seq2']}")
    
    # print(f"Insertions found: {len(indel_info['insertions'])}")
    # for start1, start2, length, inserted_seq in indel_info['insertions']:
    #     print(f"  Insertion: seq2[{start2}:{start2+length}] = '{inserted_seq}'")
    
    # # Example 2: Deletion
    # print("\n2. DELETION EXAMPLE:")
    # seq1_del = "VVLSWAPPV"
    # seq2_del = "VVLSWAPV"   # 'P' deleted
    
    # print(f"Sequence 1: {seq1_del}")
    # print(f"Sequence 2: {seq2_del}")
    
    # indel_info_del = find_indels_with_edlib(seq1_del, seq2_del)
    
    # print(f"Aligned sequences:")
    # print(f"Seq1: {indel_info_del['aligned_seq1']}")
    # print(f"Seq2: {indel_info_del['aligned_seq2']}")
    
    # print(f"Deletions found: {len(indel_info_del['deletions'])}")
    # for start1, start2, length, deleted_seq in indel_info_del['deletions']:
    #     print(f"  Deletion: seq1[{start1}:{start1+length}] = '{deleted_seq}'")
    
    # # Example 3: Both insertion and deletion
    # print("\n3. BOTH INSERTION AND DELETION EXAMPLE:")
    # seq1_both = "VVLSWAPPV"
    # seq2_both = "VVLSWAPQV"   # 'P' deleted, 'Q' inserted
    
    # print(f"Sequence 1: {seq1_both}")
    # print(f"Sequence 2: {seq2_both}")
    
    # indel_info_both = find_indels_with_edlib(seq1_both, seq2_both)
    
    # print(f"Aligned sequences:")
    # print(f"Seq1: {indel_info_both['aligned_seq1']}")
    # print(f"Seq2: {indel_info_both['aligned_seq2']}")
    
    # print(f"Insertions found: {len(indel_info_both['insertions'])}")
    # for start1, start2, length, inserted_seq in indel_info_both['insertions']:
    #     print(f"  Insertion: seq2[{start2}:{start2+length}] = '{inserted_seq}'")
    
    # print(f"Deletions found: {len(indel_info_both['deletions'])}")
    # for start1, start2, length, deleted_seq in indel_info_both['deletions']:
    #     print(f"  Deletion: seq1[{start1}:{start1+length}] = '{deleted_seq}'")
    
    # # Example 4: Actual insertion + deletion
    # print("\n4. ACTUAL INSERTION + DELETION EXAMPLE:")
    # seq1_actual = "VVLSWAPPV"
    # seq2_actual = "VVLSWAPVQ"   # 'P' deleted, 'Q' added at end
    
    # print(f"Sequence 1: {seq1_actual}")
    # print(f"Sequence 2: {seq2_actual}")
    
    # indel_info_actual = find_indels_with_edlib(seq1_actual, seq2_actual)
    
    # print(f"Aligned sequences:")
    # print(f"Seq1: {indel_info_actual['aligned_seq1']}")
    # print(f"Seq2: {indel_info_actual['aligned_seq2']}")
    
    # print(f"Insertions found: {len(indel_info_actual['insertions'])}")
    # for start1, start2, length, inserted_seq in indel_info_actual['insertions']:
    #     print(f"  Insertion: seq2[{start2}:{start2+length}] = '{inserted_seq}'")
    
    # print(f"Deletions found: {len(indel_info_actual['deletions'])}")
    # for start1, start2, length, deleted_seq in indel_info_actual['deletions']:
    #     print(f"  Deletion: seq1[{start1}:{start1+length}] = '{deleted_seq}'")
    
    # print(f"\n" + "="*60)
    # print("KEY POINTS FOR FINDING INDELS WITH EDLIB:")
    # print("="*60)
    # print("1. Use task='path' in edlib.align()")
    # print("2. Use getNiceAlignment() to get sequences with gaps")
    # print("3. Look for '-' characters which indicate gaps")
    # print("4. Insertion: gap in seq1 (seq2 has extra characters)")
    # print("5. Deletion: gap in seq2 (seq1 has extra characters)")
    # print("6. The function returns positions, lengths, and actual sequences")
    # print("7. Note: Substitutions (like P->Q) are mismatches, not indels")
    # print("8. Indels require actual gaps in the alignment")

    seq1 = "VVLSWAPPV"
    seq2 = "VVMSWAPRV"

    indel_info = find_indels_with_edlib(seq1, seq2)

    print(indel_info)