""" OpenCV for matching historical documents test code

    Author:     Alexander Persaud
                Department of Economics
                University of Richmond
                apersaud@richmond.edu

    Suggestions for users of this file:

        1. Identify a template image or multiple images.
            It is not necessary, though it's easier, to keep all images in the same directory.
        2. Put all image files to search in one folder (directory).
            It is not necessary, though it's easier, to keep all images in the same directory.
        3. Assign directory parameters for both types of image
        4. Change the loop structure as needed to reflect image types.

    Test comparison data available from the National Library of Australia
    https://nla.gov.au/nla.obj-2787566579/view?partId=nla.obj-2787578098#page/n0/mode/1up
    Ship:  "Hereford" from 1888

    Suggested additional readings:
        https://docs.opencv.org/master/dc/dc3/tutorial_py_matcher.html
        https://opencv24-python-tutorials.readthedocs.io/en/latest/py_tutorials/py_feature2d/py_matcher/py_matcher.html
"""

from datetime import datetime
startTime = datetime.now() # Start timing the duration of the script

import numpy as np #Numpy / statistical

import cv2 as cv # Computer vision (CV) library

import matplotlib.pyplot as plt # Plotting library

#import os # Required to change the working directory; not recommended.

import pathlib  # Better than working directory changes

import glob # Used for looping over images IF pathlib is not used

import csv


#to do
#   figure out what each command means
#   create a value for matches (or set of values) to quantify the quality of the match
#   create some kind of output file to hold match quality
#end to do

#   Set directory/ies depending on the user's file locations:

templateP = pathlib.Path('')

searchP = pathlib.Path('')

writeP = pathlib.Path('')

# Set parameters for the entire program

index_params = dict(algorithm = 1, trees = 5)  # FLANN parameters: k-d tree algorithm (=1) with 5 trees

search_params = dict(checks=50)   # FLANN parameter:  number of times to search

plot_match_img_yn = 0   # Set 1 to plot match points or 0 otherwise

verbose_yn = 0  # Set to 1 to print more intermediate output or 0 otherwise
                # Note that the verbose here does NOT automatically plot (see above)
                # More intermediate output may slow overall processing

match_dist = 0.8    # Lower indicates a stricter match criterion.
                    # Lowe (2004) suggests ~0.8.  The CDF of true matches drops off between 0.7 and 0.8.
                    # Additionally, the CDF of false matches grows rapidly after 0.8.

height_crop = 1.55 # Value to crop sample images from top (1 = no crop and is the min)

width_crop = 1  # Value to crop sample images from left (1 = no crop and is the min)

# Implement

# Define sift for Scale-Invariant Feature Transform (SIFT)

sift = cv.SIFT_create()

# Initialize the export file

# Load images in grayscale to reduce comparison issues

template_img=[]                                         # Initialize the template image list.
template_files = sorted (templateP.glob('*.png'))       # Create list of template files

header_csv = ['Image file']                             # Start the header for the .csv output

template_kp = []
template_des = []

for template_file in template_files:                    # Loop over template files
    if verbose_yn == 1:
        print(template_file)
    img1 = cv.imread(str(template_file),                # Import the template file
                     cv.IMREAD_GRAYSCALE)               # Load it in grayscale for ease.
    template_img.append(img1)                           # Append it to the template image list.
    header_csv.append(pathlib.PurePath(template_file).stem) # Append file name to the header

    kp1, des1 = sift.detectAndCompute(img1, None)  # Key points and descriptors for both images

    template_kp.append(kp1)
    template_des.append(des1)

    if verbose_yn == 1:
        print(header_csv)

with open(str(pathlib.PurePath(writeP,'matches.csv')),  # Export the header to a .csv file
          'w',newline='') as csvfile:
    match_writer = csv.writer(csvfile)
    match_writer.writerow(header_csv)

    comparison_files = searchP.glob('*.jpg')              # Create list of comparison files
    for comparison_file in comparison_files:                # Loop over comparison files
        if verbose_yn == 1:
            print(comparison_file)
        img2_raw = cv.imread(str(comparison_file),
                         cv.IMREAD_GRAYSCALE)               # Comparison (search) image; load it in grayscale for ease.

        img2 = img2_raw[0:int(img2_raw.shape[0]/height_crop), # Crop image per parameters above
               0:int(img2_raw.shape[1]/width_crop)]
        plt.imshow(img2)

        match_line=[]
        match_line.append(pathlib.PurePath(comparison_file).stem) # Get the file name for the row

        kp2, des2 = sift.detectAndCompute(img2, None)        # Get key points. No mask to pass so None is specified.

        for i, img1 in enumerate(template_img):                           # Loop over the template files saved previously.

            # Load key points and descriptors for the template (the essential parts of each image).

            kp1 = template_kp[i]

            des1 = template_des[i]

            # Define flann for Fast Library for Approximate Nearest Neighbors (FLANN)

            flann = cv.FlannBasedMatcher(index_params,search_params)

            matches = flann.knnMatch(des1,                  # template descriptors from SIFT
                                     des2,                  # comparison descriptors from SIFT
                                     k=2)                   # number of best matches set to 2

            # Create a mask, an 'area' to search for matches
            # The size of the mask is set to the size of the match variable just created

            matchesMask = [[0,0] for i in range(len(matches))]
            match_calib = np.shape(matches)[0]              # Get dims of matches
                                                            # 0th item in the array is the total num of matches
            # Ratio test

            match_num = 0                                   # Number of matches found
            for i,(m,n) in enumerate(matches):
                if m.distance < match_dist*n.distance:      # match_dist set above
                    matchesMask[i] = [1, 0]                 # Set to be a match
                    match_num = match_num + 1               # Iterate up

            # Set base parameters for a plotted image

            draw_params = dict(matchColor = (60,0,250),     # RGB coordinates for connector lines
                               singlePointColor = (155,110,30), # RGB coordinates
                               matchesMask = matchesMask,    # Created above
                               flags = cv.DrawMatchesFlags_DEFAULT) #

            img3 = cv.drawMatchesKnn(img1,kp1,              # Information from template
                                         img2,kp2,          # Information from search image
                                         matches,           # Match array created above
                                         None,              # Replace with a file name to export the matched image
                                     **draw_params)         # Plotting parameters set just above

            if verbose_yn == 1:
                print("Actual/Total possible matches: ", match_num, "/", match_calib)

            if plot_match_img_yn == 1:                      # Parameter set at top to plot or not
                plt.imshow(img3,),plt.show()



            # Calculate % of matches as a match quality measure

            # print(match_line)
            match_line.append(match_num/match_calib)

        # Export data down here

        match_writer.writerow(match_line)

print("Total time: ", datetime.now() - startTime)