#!/usr/bin/python

import mmbot as mmb
import argparse
import sys
import os
import ujson as json
import time
from pprint import pprint
import logging


def moveFile(filepath):
    try:
        os.rename(filepath, filepath + ".done")
    except Exception as e:
        logging.error("Error: {}".format(str(e)))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='''Malicious Macro Bot: Python 
        module to classify and cluster Microsoft office documents.  Uses 
        machine learning techniques to determine if VBA code is malicious or 
        benign and groups similar documents together.  There are two main 
        for this command line utility: 1) specifying input, output, and 
        format of samples to analyze. 2) updating your own model, requiring 
        the other command line options to all be set.''')

    parser.add_argument('-i', '--input', help='Path to sample or directory of samples to analyze.  Files will be renamed ".done" post analysis.')
    parser.add_argument('-o', '--output', help='(Optional) Path to store results from analyzing files.  Default will save in the same directory as the input.')
    parser.add_argument('-s','--model_saved_state', help='(Optional) Use the model at this location for the analysis.  Default is the model that came with the python package.')
    parser.add_argument('-f', '--output_format', help='(Optional) format for the output, either "json" or "csv".  Default is json.')
    parser.add_argument('-u', '--update_model', help='Save a newly built model at this directory path, using the samples found in the path to known malicious_samples and known benign_samples.')
    parser.add_argument('-b', '--benign_samples', help='Path to known benign samples to be used as training data for a new model.  Required if -u option is used.')
    parser.add_argument('-m', '--malicious_samples', help='Path to known malicious samples to be used as training data for a new model.  Required if -u option is used.')
    args = parser.parse_args()

    # Use Case: Analyze samples on disk.
    if args.input:
        if not os.path.exists(args.input):
            logging.error("\nSupplied input file path did not exist.  Exiting.\n")
            parser.print_help()
            sys.exit(1)

        input = args.input

        if args.output:
            output = args.output
            if not os.path.exists(args.output):
                logging.warning("Output path {} did not exist, creating it and continuing with analysis.".format(args.output))
                os.makedirs(args.output)
        else:
            output = input

        if args.model_saved_state:
            if not os.path.exists(args.model_saved_state):
                logging.error("\nSupplied saved model state did not exist.  Exiting.\n")
                parser.print_help()
                sys.exit(1)
            mymacrobot = mmb.MaliciousMacroBot(benign_path=None,
                                               malicious_path=None,
                                               model_path=args.model_saved_state)
        else:
            mymacrobot = mmb.MaliciousMacroBot()

        mymacrobot.mmb_init_model(modelRebuild=False)

        results = mymacrobot.mmb_predict(input, datatype='filepath', exclude_files='.done')

        outputfilename = os.path.join(output, time.strftime("%Y%m%d_%H%M%S_mmbot_results"))
        writetojson = False
        dictresults = mymacrobot.mmb_prediction_to_json(results)
        if args.output_format:
            if args.output_format == 'json':
                dictresults = mymacrobot.mmb_prediction_to_json(results)
                with open(outputfilename + '.json', 'w') as outfile:
                    jsonresults = json.dumps(dictresults, ensure_ascii=False)
                    json.dump(jsonresults, outfile)
                    writetojson = True

        if not writetojson:
            csvcolumns = ['filemodified', 'filename', 'filepath', 'filesize', 'md5',
               'filename_vba', 'stream_path', 'feat_1_cnt', 'feat_1_importance', 'feat_1_name', 'feat_2_cnt',
               'feat_2_importance', 'feat_2_name', 'feat_3_cnt', 'feat_3_importance', 'feat_3_name', 'feat_4_cnt',
               'feat_4_importance', 'feat_4_name', 'feat_5_cnt', 'feat_5_importance', 'feat_5_name', 'featureprint',
               'function_names', 'prediction', 'vba_avg_param_per_func', 'vba_cnt_comment_loc_ratio',
               'vba_cnt_comments', 'vba_cnt_func_loc_ratio', 'vba_cnt_functions', 'vba_cnt_loc', 'vba_entropy_chars',
               'vba_entropy_func_names', 'vba_entropy_words', 'vba_mean_loc_per_func']
            results[csvcolumns].to_csv(outputfilename + '.csv', index=False, header=True, encoding='utf-8')

        results.filepath.apply(moveFile)

        pprint(dictresults)

    # Use Case: Update model and save it at a specific location
    elif args.update_model:
        if not (args.benign_samples and args.malicious_samples):
            logging.error("\nWhen the -u option is specified, the -b and -m must both also be provided.  Exiting.\n")
            parser.print_help()
            sys.exit(1)

        if not (os.path.exists(args.benign_samples) or os.path.exists(args.malicious_samples)):
            logging.error("\nPaths to known benign and malicious samples did not exist.  Exiting.\n")
            parser.print_help()
            sys.exit(1)

        if not os.path.exists(args.update_model):
            logging.warning("Path for new model {} did not exist, creating it.".format(args.update_model))
            os.makedirs(args.update_model)

        mymacrobot = mmb.MaliciousMacroBot(benign_path=args.benign_samples,
                                       malicious_path=args.malicious_samples,
                                       model_path=args.update_model)
        mymacrobot.mmb_init_model(modelRebuild=True)
    else:
        parser.print_help()
        sys.exit(1)
