# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/pdf.ipynb.

# %% auto 0
__all__ = ['PdfClient']

# %% ../nbs/pdf.ipynb 3
from reportlab.pdfgen import canvas
from reportlab.pdfbase import pdfmetrics
from reportlab.lib.pagesizes import A4, landscape
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
import hashlib
import os
import json
import requests
from PIL import Image
from reportlab.lib.colors import *
from tqdm import tqdm
from reportlab.lib import colors
import random

# %% ../nbs/pdf.ipynb 4
class PdfClient:
    """PDFを操作するクラス"""

    tmp_dir = "tmp"

    def __init__(self):
        """
        コンストラクタ。一時フォルダを作成します。
        """
        os.makedirs(self.tmp_dir, exist_ok=True)

    def get_manifest_json_from_path(self, path):
        """
        ファイルパスからJSONデータを読み込みます。

        Parameters
        ----------
        path : str
            JSONファイルのパス

        Returns
        -------
        dict
            JSONファイルから読み込んだデータ
        """
        with open(path) as f:
            manifest_json = json.load(f)
        return manifest_json

    def get_manifest_json_from_url(self, iiif_manifest_url):
        """
        URLからJSONデータを取得します。すでに取得済みの場合はそのデータを返します。

        Parameters
        ----------
        iiif_manifest_url : str
            JSONデータのURL

        Returns
        -------
        dict
            URLから取得したJSONデータ
        """
        hs = hashlib.md5(iiif_manifest_url.encode()).hexdigest()
        path = f"{self.tmp_dir}/manifest/{hs}.json"

        if not os.path.exists(path):
            os.makedirs(os.path.dirname(path), exist_ok=True)
            df = requests.get(iiif_manifest_url).json()
            json.dump(df, open(path, "w"), ensure_ascii=False, indent=4)

        return self.get_manifest_json_from_path(path)
    
    def _initialize_canvas(self, newPdfPage, image_size):
        """
        Canvasの初期化を行います。ページサイズを設定し、そのサイズを返します。

        Parameters
        ----------
        newPdfPage : reportlab.pdfgen.canvas.Canvas
            初期化するCanvas
        image_size : tuple
            画像のサイズ (width, height)

        Returns
        -------
        dict
            ページサイズ {'width': float, 'height': float}
        """
        image_width, image_height = image_size
        ratio = image_width / image_height
        page_size = {}

        if ratio > 1.0:
            newPdfPage.setPageSize(landscape(A4))
            page_size['width'], page_size['height'] = landscape(A4)
        else:
            newPdfPage.setPageSize(A4)
            page_size['width'], page_size['height'] = A4

        return page_size
    
    def _draw_on_canvas(self, newPdfPage, annotations, image_size, page_size, font_page_limit, default_color, default_alpha, default_main_color, default_main_alpha):
        """
        Canvasに画像とアノテーションを描画します。

        Parameters
        ----------
        newPdfPage : reportlab.pdfgen.canvas.Canvas
            描画するCanvas
        annotations : list
            描画するアノテーションのリスト
        image_size : tuple
            画像のサイズ (width, height)
        page_size : dict
            ページのサイズ {'width': float, 'height': float}
        font_page_limit : int
            フォントサイズの上限
        default_color : str
            デフォルトの色
        default_alpha : float
            デフォルトの透明度
        default_main_color : str
            メインテキストのデフォルトの色
        default_main_alpha : float
            メインテキストのデフォルトの透明度
        """
        # ... annotation drawing code ...

    def convert_iiif2pdf(self, output_path, iiif_manifest_url=None, iiif_manifest_path=None, post_text_size=5,
                         default_color="red", default_alpha=0.5, default_main_color="gray", default_main_alpha=0.5,
                         canvas_range=None, font_page_limit=24, task_id="base"):
        """
        IIIFマニフェストをPDFに変換します。

        Parameters
        ----------
        output_path : str
            出力するPDFのパス
        iiif_manifest_url : str, optional
            IIIFマニフェストのURL
        iiif_manifest_path : str, optional
            IIIFマニフェストのファイルパス
        post_text_size : int, optional
            ポストテキストのサイズ
        default_color : str, optional
            デフォルトの色
        default_alpha : float, optional
            デフォルトの透明度
        default_main_color : str, optional
            メインテキストのデフォルトの色
        default_main_alpha : float, optional
            メインテキストのデフォルトの透明度
        canvas_range : list, optional
            キャンバスの範囲 [start, end]
        font_page_limit : int, optional
            フォントサイズの上限
        task_id : str, optional
            タスクID
        compress_quality : int, optional
            圧縮率
        """
        # ... method content ...

        '''
        newPdfPage = canvas.Canvas(output_path)
        image_size = (image_width, image_height)

        page_size = self._initialize_canvas(newPdfPage, image_size)
        self._draw_on_canvas(newPdfPage, annotations, image_size, page_size, font_page_limit,
                             default_color, default_alpha, default_main_color, default_main_alpha)

        newPdfPage.save()
        '''
        pass

    def convert_iiif2pdf(self, output_path, iiif_manifest_url = None, iiif_manifest_path = None, post_text_size = 0, default_color = "red", default_alpha=0.0, default_main_color = "gray", default_main_alpha = 0.0, canvas_range=None, font_page_limit = 24, task_id = "base", compress_quality = 10):
        """
        IIIFマニフェストをPDFに変換します。

        Parameters
        ----------
        output_path : str
            出力するPDFのパス
        iiif_manifest_url : str, optional
            IIIFマニフェストのURL
        iiif_manifest_path : str, optional
            IIIFマニフェストのファイルパス
        post_text_size : int, optional
            ポストテキストのサイズ
        default_color : str, optional
            デフォルトの色
        default_alpha : float, optional
            デフォルトの透明度
        default_main_color : str, optional
            メインテキストのデフォルトの色
        default_main_alpha : float, optional
            メインテキストのデフォルトの透明度
        canvas_range : list, optional
            キャンバスの範囲 [start, end]
        font_page_limit : int, optional
            フォントサイズの上限
        task_id : str, optional
            タスクID
        compress_quality : int, optional
            圧縮率
        """

        if iiif_manifest_url is None and iiif_manifest_path is None:
            raise Exception('iiif_manifest_url or iiif_manifest_path must be specified.')

        # pass
        newPdfPage = canvas.Canvas(output_path)

        pdfmetrics.registerFont(UnicodeCIDFont('HeiseiKakuGo-W5', isVertical=True))

        if iiif_manifest_url is not None:
            
            manifest_json = self.get_manifest_json_from_url(iiif_manifest_url)

        elif iiif_manifest_path is not None:
            manifest_json = self.get_manifest_json_from_path(iiif_manifest_path)

        contexts = manifest_json["@context"]
        if not isinstance(contexts, list):
            contexts = [contexts]
        if contexts[0] != "http://iiif.io/api/presentation/3/context.json":
            raise Exception("Not supported context")
        
        canvases = manifest_json["items"]

        if canvas_range is not None:
            canvases = canvases[canvas_range[0]:canvas_range[1]]

        for iiif_canvas in tqdm(canvases):
            image_url = iiif_canvas["items"][0]["items"][0]["body"]["id"]

            image_hash = hashlib.md5(image_url.encode()).hexdigest()

            img_path = f"{self.tmp_dir}/{task_id}/images/{image_hash}.jpg"

            if not os.path.exists(img_path):
                os.makedirs(os.path.dirname(img_path), exist_ok=True)
                df = requests.get(image_url).content
                open(img_path, "wb").write(df)

            # fliped_img_path = img_path + ".fliped.jpg"
            im = Image.open(img_path)
            image_width, image_height = im.size

            # ページサイズ
            page_size = {}

            ratio = image_width / image_height


            page_size = {}


            if ratio > 1.0 :
                newPdfPage.setPageSize(landscape(A4))
                page_size['width'], page_size['height'] = landscape(A4)
            else:
                newPdfPage.setPageSize(A4)
                page_size['width'], page_size['height'] = A4

            # 画像のほうが横に長い

            p_height = page_size['height'] / image_height
            p_width = page_size['width'] / image_width

            '''
            print("p_height", p_height)
            print("p_width", p_width)
            '''
            
            # 小さい方のスケールを維持する
            scale = min(p_height, p_width)

            '''
            image_page_height = image_height * scale
            image_page_width = image_width * scale
            
            print("image_original_height", image_height)
            print("image_original_width", image_width)

            print("image_page_height", image_page_height)
            print("image_page_width", image_page_width)
            '''

            '''
            im_resized = im.resize((int(image_page_width), int(image_page_height))) # , Image.LANCZOS
            
            im_resized_path = img_path + ".resized.jpg"
            
            im_resized.save(im_resized_path)

            im_webp_path = img_path + ".webp"

            im.save(im_webp_path, optimize=True, quality=comp_q)
            '''

            im_jpg_path = img_path + ".comp.jpg"

            im.save(im_jpg_path, optimize=True, quality=compress_quality)

            # newPdfPage.drawImage(im_resized_path,0,0,width=page_size['width'], height=page_size['height'], preserveAspectRatio=True)
            # newPdfPage.drawImage(im_webp_path,0,0,width=page_size['width'], height=page_size['height'], preserveAspectRatio=True)
            newPdfPage.drawImage(im_jpg_path,0,0,width=page_size['width'], height=page_size['height'], preserveAspectRatio=True)

            offset_page_x = (page_size['width'] - image_width * scale) / 2
            offset_page_y = (page_size['height'] - image_height * scale) / 2

            annotations = iiif_canvas["annotations"][0]["items"]

            sorted_annotations = self.sort_annotation(annotations)

            prev_group = None

            for i in range(len(sorted_annotations)):
                row = sorted_annotations[i]
                x1 = row["x1"]
                y1 = row["y1"]
                x2 = row["x2"]
                text_value = row["text"]
                w = row["w"]
                h = row["h"]

                if len(text_value) == 0:
                    continue

                anchor_y = image_height - y1
                anchor_x = x1

                text_height = h / len(text_value)
                font_image_size = text_height

                font_page_size = font_image_size * scale

                if font_page_size > font_page_limit:
                    # continue
                    font_page_size = font_page_limit

                newPdfPage.setFont('HeiseiKakuGo-W5', font_page_size)

                preText = ""

                postText = self.getPostText(i, sorted_annotations, size=post_text_size)

                start = (anchor_y + text_height * len(preText))

                fixed_text_value = preText + text_value + postText

                color, alpha = self.get_color(prev_group, row, default_color=default_color, default_alpha=default_alpha, default_main_color=default_main_color, default_main_alpha=default_main_alpha)

                newPdfPage.setFillColor(color, alpha=alpha)

                # newPdfPage.drawString(scale * anchor_x, scale * start, fixed_text_value)

                x_start = anchor_x + font_image_size / 2
                x_start = (x1 + x2) / 2

                x_page_start = offset_page_x + scale * x_start
                y_page_start = offset_page_y + scale * start

                newPdfPage.drawString(x_page_start, y_page_start, fixed_text_value)

                prev_group = row["group"]

                # break

            newPdfPage.showPage()

        newPdfPage.save()

    def get_color(self, prev_group, row, default_color = "red", default_alpha = 0.5, default_main_color = "gray", default_main_alpha = 0.5):
        color = default_color
        alpha = default_alpha

        if prev_group != row["group"]:
            # Get a list of color names from the colors module
            color_names = [name for name in dir(colors) if isinstance(getattr(colors, name), colors.Color)]

            # Pick a random color name
            random_color_name = random.choice(color_names)

            color = random_color_name
        
        if row["type"] == "本文":
            color = default_main_color
            alpha = default_main_alpha

        return color, alpha

    def sort_annotation(self, annotations):
        xy_map = {}

        for annotation in annotations:
            xywh = annotation["target"].split("#xywh=")[1].split(",")
            x1 = int(xywh[0])
            y1 = int(xywh[1])
            w = int(xywh[2])
            h = int(xywh[3])

            x2 = x1 + w
            y2 = y1 + h

            text = annotation["body"]["value"]

            # text_type
            if text.startswith("【"):
                text_type, text_value = text[1:].split("】")
            else:
                text_type = "本文"
                text_value = text

            row = {
                "x1": x1,
                "y1": y1,
                "x2": x2,
                "y2": y2,
                "text": text_value,
                "type": text_type,
                "w": w,
                "h": h,
            }

            # rows.append(row)

            x_key = str(100000 - x1).zfill(8)
            # y_key = str(y1).zfill(8)

            xy_key = f"{x_key}" # -{y_key}"

            if text_type not in xy_map:
                xy_map[text_type] = {}

            if xy_key not in xy_map[text_type]:
                xy_map[text_type][xy_key] = []

            xy_map[text_type][xy_key].append(row)

        '''
        with open("data/p1/xy_map.json", "w") as f:
            json.dump(xy_map, f, indent=4, ensure_ascii=False)
        '''

        rows2 = []

        for text_type in xy_map:

            rows = []

            tmp_map = xy_map[text_type]

            for xy_key in sorted(tmp_map):
                lines = tmp_map[xy_key]
                for line in lines:
                    rows.append(line)
            

            if text_type == "本文":
                # Initialize the list of groups
                groups = []

                # For each object, check whether it overlaps with an existing group
                for obj in rows:
                    # This flag checks whether the object has been added to a group
                    added = False
                    for group in groups:
                        # If the object overlaps with the group, add it to the group
                        if obj["x1"] <= group[-1]["x2"] and obj["x2"] >= group[0]["x1"]:
                            group.append(obj)
                            group.sort(key=lambda x: x['x1'])  # Sort the group by x_start
                            added = True
                            break
                    # If the object does not overlap with any group, create a new group
                    if not added:
                        groups.append([obj])

                # Print the groups
                for i, group in enumerate(groups):
                    # print(f"Group {i+1}:")
                    # print(json.dumps(group, ensure_ascii=False))

                    tmp = {}

                    for obj in group:

                        y1 = obj["y1"]
                        if y1 not in tmp:
                            tmp[y1] = []

                        tmp[y1].append(obj)

                    for y1 in sorted(tmp):
                        objs = tmp[y1]
                        for obj in objs:
                            obj["group"] = f"{text_type}"
                            rows2.append(obj)

            elif text_type == "頭注":
                objects = rows
                # Calculate the average width of objects
                widths = [(obj["x2"] - obj["x1"]) for obj in objects]
                average_width = sum(widths) / len(widths)

                # Initialize the list of groups
                groups = [[objects[0]]]

                # For each object, check whether it overlaps with the last object in the current group
                for obj in objects[1:]:
                    # If the gap is less than half of the average width, add the object to the current group
                    if groups[-1][-1]["x1"] - obj["x2"]  < average_width / 2:
                        groups[-1].append(obj)
                    else:
                        # Otherwise, start a new group
                        groups.append([obj])

                # Print the groups
                for i, group in enumerate(groups):
                    # print(f"Group {i+1}: {group}")
                    for obj in group:
                        obj["group"] = f"{text_type}-{i + 1}"
                        rows2.append(obj)

            elif text_type == "割注":
                objects = rows
                # Calculate the average width of objects
                widths = [(obj["x2"] - obj["x1"]) for obj in objects]
                average_width = sum(widths) / len(widths)

                # Initialize the list of groups
                groups = [[objects[0]]]

                # For each object, check whether it overlaps with the last object in the current group
                for obj in objects[1:]:
                    # If the gap is less than half of the average width, add the object to the current group
                    if groups[-1][-1]["x1"] - obj["x2"]  < average_width / 2:
                        groups[-1].append(obj)
                    else:
                        # Otherwise, start a new group
                        groups.append([obj])

                # Print the groups
                for i, group in enumerate(groups):
                    # print(f"Group {i+1}: {len(group)} {group}")

                    # このグループのなかで、さらに、y座標に重なりがあるかを調べる

                    groups2 = self.divideByY(group)

                    for j, group2 in enumerate(groups2):

                        for obj in group2:
                            # for obj in group:
                            obj["group"] = f"{text_type}-{i + 1}-{j + 1}"
                            
                            rows2.append(obj)
                            # rows2.append(obj)
            
            else:
                for i, obj in enumerate(rows):
                    obj["group"] = f"{text_type}-{i+1}"
                    rows2.append(obj)

        
        with open(f"{self.tmp_dir}/sorted_annotaions.json", "w") as f:
            json.dump(rows2, f, indent=4, ensure_ascii=False)
        

        return rows2


    def getPostText(self, i, rows, size=5):
        current_type = rows[i]["group"]

        for j in range(i + 1, len(rows)):
            if rows[j]["group"] == current_type:
                return rows[j]["text"][:size]
            
        return ""
    
    def divideByY(self, objects):
        groups = []

        # For each object, check whether it overlaps with an existing group
        for obj in objects:
            # This flag checks whether the object has been added to a group
            added = False
            for group in groups:
                # If the object overlaps with the group, add it to the group
                if obj["y1"] <= group[-1]["y2"] and obj["y2"] >= group[0]["y1"]:
                    group.append(obj)
                    group.sort(key=lambda x: -x['x1'])  # Sort the group by x_start
                    added = True
                    break
            # If the object does not overlap with any group, create a new group
            if not added:
                groups.append([obj])

        '''
        # Print the groups
        for i, group in enumerate(groups):
            # print(f"Group2 {i+1}:")
            # print(json.dumps(group, indent=4, ensure_ascii=False))
            pass
        '''

        return groups
