import { Box, Skeleton, Stack, Typography } from "@mui/material";
import React, { useEffect, useMemo, useRef } from "react";
import textMetricsLicenses from "../../charts/textMetricsLicenses";
import textMetricsSynthetic from "../../charts/textMetricsSynthetic";
import tasksSunburst from "../../charts/tasksSunburst";
import useAxios from "../../hooks/useAxios";
import sourceTree from "../../charts/sourceTree";
import { useExplorerData } from "../../hooks/useExplorerData";

import LoadingAnimation from "../LoadingAnimation";
import VisualizationContainer from "./VisualizationContainer";
import Titlebar from "./components/Titlebar";
import TextSummary from "./components/TextSummary";
import createMapPlaceholder from "./utils/createMapPlaceholder";
import SkeletonLoader from "./components/SkeletonLoader";

export default function TextCharacteristics() {
  const explorerData = useExplorerData();

  const tasksGroupsUrl = "/constants/task_groups.json";
  const domainGroupsUrl = "/constants/domain_groups.json";

  const tasksGroupsRes = useAxios({ method: "get", url: tasksGroupsUrl });
  const domainGroupsRes = useAxios({ method: "get", url: domainGroupsUrl });

  const textMetricsLicensesMap = useMemo(() => {
    if (!explorerData.filteredData) return createMapPlaceholder();

    const container = document.createElement("div");
    document.body.appendChild(container);

    textMetricsLicenses(container, explorerData.filteredData);

    document.body.removeChild(container);

    return container;
  }, [explorerData.filteredData]);

  const syntheticTextMap = useMemo(() => {
    if (!explorerData.filteredData) return;
    const container = document.createElement("div");
    document.body.appendChild(container);

    textMetricsSynthetic(container, explorerData.filteredData);
    document.body.removeChild(container);
    return container;
  }, [explorerData.filteredData]);

  const tasksGroupReadyToAdd =
    tasksGroupsRes.response && !tasksGroupsRes.loading;

  const taskGroupsMap = useMemo(() => {
    if (!tasksGroupReadyToAdd) return createMapPlaceholder();

    const elem = tasksSunburst(
      tasksGroupsRes.response,
      explorerData.filteredData,
    )!;
    return elem;
  }, [
    tasksGroupsRes.response,
    explorerData.filteredData,
    tasksGroupReadyToAdd,
  ]);

  const domainGroupsReadyToAdd =
    domainGroupsRes.response && !domainGroupsRes.loading;

  const domainGroupsMap = useMemo(() => {
    if (!domainGroupsReadyToAdd) return createMapPlaceholder();

    const container = document.createElement("div");

    document.body.appendChild(container);

    sourceTree(
      domainGroupsRes.response,
      container,
      explorerData.filteredData || [],
    );

    document.body.removeChild(container);

    return container;
  }, [
    domainGroupsReadyToAdd,
    explorerData.filteredData,
    domainGroupsRes.response,
  ]);

  return (
    <Stack sx={{ gap: 5 }}>
      <Box>
        <TextSummary>
          This section covers various characteristics of the text in the
          datasets.
        </TextSummary>
      </Box>
      <Box>
        <Titlebar sx={{ mb: 4 }}>
          Text Length Metrics x License Category
        </Titlebar>

        <TextSummary>
          Text-to-text datasets are formatted as an input-target pair.
        </TextSummary>
        <TextSummary>
          Here each point is a dataset, showing its input text length (in
          characters), target text length (in characters), and license category.
        </TextSummary>

        <VisualizationContainer
          content={textMetricsLicensesMap}
        ></VisualizationContainer>
      </Box>
      <Box>
        <Titlebar sx={{ mb: 4 }}>
          Text Length Metrics x Regular/Synthetic Text
        </Titlebar>
        <TextSummary>
          New text-to-text datasets are often synthetically generated by large
          models like GPT-4.
        </TextSummary>
        <TextSummary>
          Here each point is a dataset, showing its input text length (in
          characters), target text length (in characters), and whether it is
          synthetically generated, or manually/human created.
        </TextSummary>

        <VisualizationContainer
          content={syntheticTextMap}
        ></VisualizationContainer>
      </Box>
      <Box>
        <Titlebar sx={{ mb: 4 }}>Task Category Distribution</Titlebar>

        <TextSummary>
          Here we measure the variety and distribution of tasks that the
          datasets represent -- i.e. what they're teaching a model to do.
        </TextSummary>

        {tasksGroupReadyToAdd ? (
          <VisualizationContainer
            content={taskGroupsMap}
          ></VisualizationContainer>
        ) : (
          <SkeletonLoader />
        )}
      </Box>
      <Box>
        <Titlebar sx={{ mb: 4 }}>Text Source Domains</Titlebar>

        <TextSummary>
          Many datasets are originally scraped from the web or other sources.
          For the data you've selected, we cluster the original sources by
          Domain, quantify them and show the top sources 5 per domain.
        </TextSummary>

        {domainGroupsReadyToAdd ? (
          <VisualizationContainer
            content={domainGroupsMap}
          ></VisualizationContainer>
        ) : (
          <SkeletonLoader />
        )}
      </Box>
    </Stack>
  );
}
