#!/usr/bin/env python
from __future__ import print_function
import os
import sys
import re
import json
import shutil
import tempfile
import logging
import string
import random
from os.path import isfile
from urllib.request import urlopen
from shutil import copyfileobj
from optparse import OptionParser
from functools import reduce
from collections import namedtuple, OrderedDict

logging.basicConfig(level=logging.WARNING, format='%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', datefmt='%Y-%m-%d:%H:%M:%S',)

# adding all methods to the python path
script_dir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(script_dir )
methods_dir = os.path.join(script_dir, '../method')
                          
for method_dir_name in os.listdir(methods_dir):
    method_base_dir = os.path.join(methods_dir, method_dir_name)
    if os.path.isdir(method_base_dir):
        sys.path.append(method_base_dir)

# TODO: determine which functions from the utils are more generally applicable than
#       just for this standalone and pull out into a 'common' utils library.

#from allele_validator import Allele_Validator
#from validations import pepmatch_validate
from split import split_parameters_file
from aggregation import aggregate_result_file
from sequences import Proteins
from predictions import run_pepmatch


def generate_random_str(length):
    return ''.join(random.sample(string.digits+string.ascii_letters, length))

def eprint(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)

def read_peptides(fname):
    with open(fname, 'r') as r_file:
        peptides = r_file.readlines()
        peptides = [row.strip() for row in peptides if row.strip()]
        return peptides

def group_peptides_by_length(peptide_list):
    peptide_groups_by_length = []
    lengths = set(map(len, peptide_list))
    for length in lengths:
        peptide_groups_by_length.append([pep for pep in peptide_list if len(pep) == length])
    return peptide_groups_by_length

class Prediction:
    
    @staticmethod
    def commandline_help():
        # read in the example_commands.txt file and print it out
        f = open(os.path.join(script_dir, 'example_commands.txt'), 'r')
        lines = f.readlines()
        print("".join(lines))
        
    def main(self):
        import select

        try:


            parser = OptionParser(add_help_option=False)

            parser.add_option("-h", "--help",
                              action="store_true",
                              dest="help",
                              default=False,
                              help="print available commands.")

            parser.add_option("--split",
                              action="store_true",
                              dest="split_parameters_flag",
                              default=False,
                              help="flag to indicate the action we want to take with the standalone: split parameters into JSON files")
            parser.add_option("--split-dir",
                              dest="split_parameters_dir",
                              default='',
                              help="the diretory for the JSON files that input parameters splitted into")
            parser.add_option("--split-inputs-dir",
                              dest="split_inputs_dir",
                              default=None,
                              help="the diretory for the sequence and peptide files that input sequences splitted into")

            parser.add_option("--aggregate",
                              action="store_true",
                              dest="aggregate_parameters_flag",
                              default=False,
                              help="flag to indicate the action to aggregate the results")
            parser.add_option("--job-desc-file",
                              dest="job_desc_file",
                              default='',
                              help="the file path for the job description")
            parser.add_option("--aggregate-input-dir",
                              dest="aggregate_input_dir",
                              default='',
                              help="the diretory for the JSON files which have input parameters")
            parser.add_option("--aggregate-result-dir",
                              dest="aggregate_result_dir",
                              default='',
                              help="the diretory for the JSON files contains results need to be aggregated as well as the place we place the final result file")
            parser.add_option("--assume-valid",
                              action="store_true",
                              dest="assume_valid_flag",
                              default=False,
                              help="flag to indicate skiping validation")

            parser.add_option('--proteomes-path',
                              dest="proteomes_path",
                              help="The path of pepmatch proteome files",
                              default=os.environ.get('PEPMATCH_PROTEOMES_PATH', ''))

            parser.add_option("-j", dest="json_filename",
                              help="FILE containing all parameters.", metavar="JSON_FILE")

            parser.add_option("--output-prefix", "-o", dest="output_prefix",
                              help="prediction result output path and prefix.", metavar="OUTPUT_PREFIX")

            parser.add_option("--output-format", "-f", dest="output_format", default="tsv",
                              help="prediction result output format.", metavar="OUTPUT_FORMAT")




            (options, args) = parser.parse_args()

            if options.help:
                self.commandline_help()
                exit(0)

            if options.aggregate_parameters_flag:
                aggregate_result_file(options.job_desc_file, options.aggregate_input_dir, options.aggregate_result_dir)
                exit(0)

            if not options.json_filename:
                self.commandline_help()
                exit(0)

            if options.split_parameters_flag:
                split_parameters_file(options.json_filename, options.split_parameters_dir, options.split_inputs_dir, executable_file="match.py", assume_valid=options.assume_valid_flag)
                exit(0)

            if not sys.stdin.isatty():
                stdin = sys.stdin.readline().strip()
                args.append(stdin)

            args = list(filter(None, args))
            commandline_input_prediction(options, args)
 
        except Exception as e:
            print(str(e), file=sys.stderr)
            exit(1)

def truncate_file(output_path):
    with open(output_path, 'a+') as tsv_file:
        tsv_file.truncate(0)

def print_result(result):
    table_rows = result
    for row in table_rows:
        print( '\t'.join(map(str, row)))

def save_tsv(result, output_path):
    table_rows = result
    with open(output_path, 'a') as tsv_file:
        tsv_file.write( '\n'.join(['\t'.join(map(str, row)) for row in table_rows]))
        tsv_file.write( '\n')

def save_json(result, output_path):
    output_dir = os.path.dirname(output_path)
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
    with open(output_path, 'w') as w_file:
        json.dump(result, w_file, indent=2)

def get_sequence_list(input_sequence_text):
    proteins = Proteins(input_sequence_text)
    sequence_list = []
    for name, seq in zip(proteins.names, proteins.sequences):
        sequence_list.append(dict(name=name, sequence=seq))
    return sequence_list

def get_peptide_list(input_sequence_text):
    proteins = Proteins(input_sequence_text)
    return proteins.sequences

def transfer_fasta_to_peptide_file(input_sequence_text_file_path, input_length=''):
    with open(input_sequence_text_file_path, 'r') as rf:
        input_sequence_text = rf.read()
    peptide_list = []
    sequence_list = get_sequence_list(input_sequence_text)
    if not input_length:
        # means lengths = ['asis',]
        peptide_list = [seq['sequence'] for seq in sequence_list]
    else:
        lengths = [int(l) for l in input_length.split(',')]
        for length in lengths:
            for sequence in sequence_list:
                seq = sequence['sequence']
                length = int(length)
                for i in range(len(seq)-length+1):
                    peptide = seq[i:i+length]
                    peptide_list.append(peptide)
    with tempfile.NamedTemporaryFile(mode='w', delete=False) as tmp_peptides_file:
        fname = tmp_peptides_file.name
        tmp_peptides_file.write('\n'.join(peptide_list))
    return fname

def get_peptides_with_diff_length(peptide_list):
    peptide_lists_len_dict = {}
    for p in peptide_list:
        key = len(p)
        value_list = peptide_lists_len_dict.setdefault(key, [])
        value_list.append(p)
    return list(peptide_lists_len_dict.values())


def save_and_get_input_sequence_text_file_path(input_sequence_fasta_uri, split_inputs_dir=None):
    with urlopen(input_sequence_fasta_uri) as fsrc, tempfile.NamedTemporaryFile(dir=split_inputs_dir, delete=False) as fdst:
        copyfileobj(fsrc, fdst)
        return fdst.name

def read_json_file(file_path):
    with open(file_path, 'r') as r_file:
        return json.load(r_file)


def commandline_input_prediction(options, args):
    """ This version takes a file containing an peptide sequences as input."""

    # 1. read input params
    output_prefix = options.output_prefix
    output_format = options.output_format

    if output_format.lower() not in ['tsv', 'json']:
        eprint('The output format options are "tsv" or "json". Invalid format "%s" is given.' % output_format)
        return
    if output_prefix:
        output_tsv = output_prefix+'.tsv'
        output_json = output_prefix+'.json'
    additional_result_info = {}
    errors = []
    warnings = []
    additional_result_info['warnings'] = warnings

    if options.json_filename:
        with open(options.json_filename, 'r') as r_file:
            input_data = json.load(r_file)
            #validate_result = pepmatch_validate(input_data)
            #errors.extend(validate_result['errors'])
            #warnings.extend(validate_result['warnings'])
        if errors:
            return {"errors": errors}
        if 'input_sequence_text_file_path' in input_data:
            with open( input_data['input_sequence_text_file_path'], 'r') as r_file:
                peptide_list = get_peptide_list(r_file.read())
        elif 'input_sequence_fasta_uri' in input_data:
            fname = save_and_get_input_sequence_text_file_path(input_data['input_sequence_fasta_uri'])
            seq_file_type = 'fasta'
            peptide_length_range = input_data['peptide_length_range']
        elif 'input_sequence_text' in input_data:
            peptide_list = get_peptide_list(input_data['input_sequence_text'])
        elif 'peptide_file_path' in input_data:
            peptide_list = read_peptides(input_data['peptide_file_path'])
        elif 'peptide_list' in input_data:
            peptide_list = input_data.get('peptide_list')

        method = input_data.get('method')
        proteome = input_data.get('proteome', 'human')
        mismatch = input_data.get('mismatch', 1)
        best_match = input_data.get('best_match', False)
        if type(best_match) is str and best_match.lower() == 'false':
            best_match = False
        else:
            best_match = bool(best_match)
        include_unmatched_peptides = input_data.get('include_unmatched_peptides', False)
        if type(include_unmatched_peptides) is str and include_unmatched_peptides.lower() == 'false':
            include_unmatched_peptides = False
        else:
            include_unmatched_peptides = bool(include_unmatched_peptides)
        pepmatch_proteomes_path = options.proteomes_path
        additional_result_info['warnings'] = warnings
        additional_result_info["results"] = []

        result = run_pepmatch(peptide_list, proteome, mismatch, best_match, pepmatch_proteomes_path, include_unmatched_peptides)

        if output_prefix:
            if output_format.lower()=='tsv':
                result = [result['table_columns']] + result['table_data']
                truncate_file(output_tsv)
                save_tsv(result, output_tsv)
            elif output_format.lower()=='json':
                result['table_columns'] = ["pepmatch."+ column_name for column_name in result['table_columns']]
                if "warnings" in result:
                    additional_result_info['warnings'].extend(result.pop('warnings'))
                additional_result_info["results"].append(result)
                save_json(additional_result_info, output_json)
            else:
                eprint('invalid output format: %s' % output_format)
                return
            
        else:
            if output_format.lower()=='tsv':
                result = [result['table_columns']] + result['table_data']
                print_result(result)
            elif output_format.lower()=='json':
                print(json.dumps(result, indent=2))
            else:
                eprint('invalid output format: %s' % output_format)
                return


if __name__ == '__main__':
    Prediction().main()


