import { parseISO, isBefore, isAfter, min as minDate } from "date-fns";
import { LICENSE_USE_TYPES } from "./constants";

function mapLicenseCriteria(dataSummary, allConstants) {
  const ourUidToLicenseInfos = {};
  const hfUidToLicenseInfos = {};
  const githubUidToLicenseInfos = {};
  const pwcUidToLicenseInfos = {};
  const ourUidToLicenseInfosNoOpenai = {};

  dataSummary.forEach((row) => {
    const uid = row["Unique Dataset Identifier"];
    ourUidToLicenseInfos[uid] = [];
    ourUidToLicenseInfosNoOpenai[uid] = [];

    row["Licenses"].forEach((licenseInfo) => {
      const licenseName = licenseInfo["License"];
      const licenseUrl = licenseInfo["License URL"];
      ourUidToLicenseInfos[uid].push([licenseName, licenseUrl]);

      if (licenseName !== "OpenAI") {
        ourUidToLicenseInfosNoOpenai[uid].push([licenseName, licenseUrl]);
      }
    });

    if (ourUidToLicenseInfosNoOpenai[uid].length === 0) {
      ourUidToLicenseInfosNoOpenai[uid].push(["Unspecified", null]);
    }

    const ghLicense = row["Inferred Metadata"]?.["GitHub License"];
    const hfyLicense = row["Inferred Metadata"]?.["HF Yaml License"];
    const hfcLicense = row["Inferred Metadata"]?.["HF Config License"];
    const pwcLicense = row["Inferred Metadata"]?.["PwC License Name"];

    if (!hfUidToLicenseInfos[uid]) {
    }

    if (hfyLicense) hfUidToLicenseInfos[uid] = [[hfyLicense, null]];
    if (hfcLicense) {
      if (!hfUidToLicenseInfos[uid]) {
        hfUidToLicenseInfos[uid] = [];
      }
      hfUidToLicenseInfos[uid].push([hfcLicense, null]);
    }
    if (ghLicense) githubUidToLicenseInfos[uid] = [[ghLicense, null]];
    if (pwcLicense) pwcUidToLicenseInfos[uid] = [[pwcLicense, null]];
  });

  const classifyAndResolveLicenses = (licenseInfos, allConstants) => {
    const classifiedLicenses = licenseInfos.map(([licenseName, licenseUrl]) =>
      classifyLicense(licenseName, licenseUrl, allConstants),
    );
    return resolveMultipleLicenses(classifiedLicenses);
  };

  const resolveLicensesForAggregators = (uidToLicenseInfos) => {
    const resolved = {};
    for (const uid in uidToLicenseInfos) {
      if (Object.hasOwnProperty.call(uidToLicenseInfos, uid)) {
        resolved[uid] = classifyAndResolveLicenses(
          uidToLicenseInfos[uid],
          allConstants,
        );
      }
    }
    return resolved;
  };

  const oursResolved = resolveLicensesForAggregators(ourUidToLicenseInfos);
  const oursOpenaiResolved = resolveLicensesForAggregators(
    ourUidToLicenseInfosNoOpenai,
  );
  const hfResolved = resolveLicensesForAggregators(hfUidToLicenseInfos);
  const ghResolved = resolveLicensesForAggregators(githubUidToLicenseInfos);
  const pwcResolved = resolveLicensesForAggregators(pwcUidToLicenseInfos);

  const addLicenseClassesToSummaries = (
    dataSummary,
    resolvedClasses,
    aggregator,
  ) => {
    dataSummary.forEach((row) => {
      const uid = row["Unique Dataset Identifier"];
      if (resolvedClasses[uid]) {
        row[`License Use (${aggregator})`] = resolvedClasses[uid][0];
        row[`License Attribution (${aggregator})`] = resolvedClasses[uid][1];
        row[`License Share Alike (${aggregator})`] = resolvedClasses[uid][2];
      }
    });
    return dataSummary;
  };

  dataSummary = addLicenseClassesToSummaries(
    dataSummary,
    oursResolved,
    "DataProvenance",
  );
  dataSummary = addLicenseClassesToSummaries(
    dataSummary,
    oursOpenaiResolved,
    "DataProvenance IgnoreOpenAI",
  );
  dataSummary = addLicenseClassesToSummaries(
    dataSummary,
    hfResolved,
    "HuggingFace",
  );
  dataSummary = addLicenseClassesToSummaries(dataSummary, ghResolved, "GitHub");
  dataSummary = addLicenseClassesToSummaries(
    dataSummary,
    pwcResolved,
    "PapersWithCode",
  );

  return dataSummary;
}

function classifyLicense(licenseName, licenseUrl, allConstants) {
  let useCase, attribution, shareAlike;

  if (licenseName === "Custom") {
    [useCase, attribution, shareAlike] = allConstants.CUSTOM_LICENSE_CLASSES[
      licenseUrl
    ] || ["?", "?", "?"];
  } else {
    [useCase, attribution, shareAlike] =
      allConstants.LICENSE_CLASSES[licenseName];
  }

  return {
    use: useCase,
    attribution: isNaN(parseInt(attribution)) ? 1 : parseInt(attribution),
    share_alike: isNaN(parseInt(shareAlike)) ? 1 : parseInt(shareAlike),
  };
}

function resolveMultipleLicenses(licenseCriterias) {
  if (!licenseCriterias.length) {
    // Return empty if no licenses from this aggregator
    return ["", "", ""];
  }

  const useCases = licenseCriterias.map((l) => l.use);
  const attributions = licenseCriterias.map((l) => l.attribution);
  const shareAlikes = licenseCriterias.map((l) => l.share_alike);

  let resolvedUseCase;
  if (useCases.includes("?")) {
    resolvedUseCase = "academic-only";
  } else if (useCases.includes("Acad")) {
    resolvedUseCase = "academic-only";
  } else if (useCases.includes("NC")) {
    resolvedUseCase = "non-commercial";
  } else if (useCases.includes("Unspecified")) {
    resolvedUseCase = "unspecified";
  } else if (useCases.includes("All")) {
    resolvedUseCase = "commercial";
  }

  const resolvedAttribution = Math.max(...attributions);
  const resolvedShareAlike = Math.max(...shareAlikes);

  return [resolvedUseCase, resolvedAttribution, resolvedShareAlike];
}

function applyFilters(
  dataSummary,
  allConstants,
  selectedCollection,
  selectedLicenses,
  selectedLicenseUse,
  openaiLicenseOverride,
  selectedLicenseAttribution,
  selectedLicenseShareAlike,
  selectedLanguages,
  selectedTaskCategories,
  selectedDomains,
  selectedStartTime,
  selectedEndTime,
) {
  let filteredData = [...dataSummary];

  // Some sanity checks
  const allLangs = new Set(Object.values(allConstants.LANGUAGE_GROUPS).flat());
  const optionLangs = new Set(filteredData.flatMap((row) => row.Languages));
  // console.assert(
  //   [...allLangs].every((lang) => optionLangs.has(lang)),
  //   `Missing Languages: ${[...optionLangs].filter((lang) => !allLangs.has(lang))}`,
  // );

  const allTcats = new Set(Object.values(allConstants.TASK_GROUPS).flat());
  const optionTcats = new Set(
    filteredData.flatMap((row) => row["Task Categories"]),
  );
  // console.assert(
  //   [...allTcats].every((cat) => optionTcats.has(cat)),
  //   `Missing Task Categories: ${[...optionTcats].filter((cat) => !allTcats.has(cat))}`,
  // );

  const allSources = new Set(Object.values(allConstants.DOMAIN_GROUPS).flat());
  const optionSources = new Set(
    filteredData.flatMap((row) => row["Text Sources"]),
  );
  // console.assert(
  //   [...allSources].every((src) => optionSources.has(src)),
  //   `Missing Text Sources: ${[...optionSources].filter((src) => !allSources.has(src))}`,
  // );

  if (selectedCollection) {
    filteredData = filteredData.filter(
      (row) => row.Collection === selectedCollection,
    );
  }

  if (filteredData.length && selectedLicenses) {
    const licenseStrs = new Set(Object.keys(allConstants.LICENSE_CLASSES));
    filteredData = filteredData.filter((row) => {
      const licenseSet = new Set(
        row.Licenses.map((license) => license.License),
      );
      return [...licenseSet].every((license) => licenseStrs.has(license));
    });
  }

  if (filteredData.length && selectedLicenseUse) {
    const useKey = openaiLicenseOverride
      ? "License Use (DataProvenance IgnoreOpenAI)"
      : "License Use (DataProvenance)";
    const validLicenseUseIdx = LICENSE_USE_TYPES.indexOf(selectedLicenseUse);
    const validLicenseUses = LICENSE_USE_TYPES.slice(
      0,
      validLicenseUseIdx + 1,
    ).map((use) => use.toLowerCase());
    filteredData = filteredData.filter((row) =>
      validLicenseUses.includes(row[useKey].toLowerCase()),
    );
  }

  if (filteredData.length && selectedLicenseAttribution) {
    filteredData = filteredData.filter(
      (row) =>
        row["License Attribution (DataProvenance)"] <=
        parseInt(selectedLicenseAttribution),
    );
  }

  if (filteredData.length && selectedLicenseShareAlike) {
    filteredData = filteredData.filter(
      (row) =>
        row["License Share Alike (DataProvenance)"] <=
        parseInt(selectedLicenseShareAlike),
    );
  }

  if (filteredData.length && !selectedLanguages.includes("All")) {
    const langStrs = new Set(
      selectedLanguages.flatMap(
        (lang) => allConstants.LANGUAGE_GROUPS[lang] || [],
      ),
    );
    filteredData = filteredData.filter((row) => {
      const rowLangs = new Set(row.Languages);
      return [...rowLangs].every((lang) => langStrs.has(lang));
    });
  }

  if (filteredData.length && !selectedTaskCategories.includes("All")) {
    const taskCatStrs = new Set(
      selectedTaskCategories.flatMap(
        (cat) => allConstants.TASK_GROUPS[cat] || [],
      ),
    );
    filteredData = filteredData.filter((row) => {
      const rowCats = new Set(row["Task Categories"]);
      return [...rowCats].every((cat) => taskCatStrs.has(cat));
    });
  }

  if (filteredData.length && !selectedDomains.includes("All")) {
    const textSourceStrs = new Set(
      selectedDomains.flatMap(
        (domain) => allConstants.DOMAIN_GROUPS[domain] || [],
      ),
    );
    filteredData = filteredData.filter((row) => {
      const rowSources = new Set(row["Text Sources"]);
      return [...rowSources].every((src) => textSourceStrs.has(src));
    });
  }

  if (filteredData.length && (selectedStartTime || selectedEndTime)) {
    filteredData.forEach((row) => {
      const metadata = row["Inferred Metadata"] || {};
      const dateColumns = ["S2 Date", "HF Date", "GitHub Date"];
      const dates = dateColumns
        .map((col) => metadata[col])
        .filter((date) => date)
        .map((date) => parseISO(date));
      row["Estimated Creation Date"] = dates.length ? minDate(dates) : null;
    });

    if (selectedStartTime) {
      const startTime = parseISO(selectedStartTime);
      filteredData = filteredData.filter(
        (row) =>
          row["Estimated Creation Date"] &&
          !isBefore(row["Estimated Creation Date"], startTime),
      );
      // console.log(
      //   filteredData[0]["Estimated Creation Date"],
      //   selectedStartTime,
      //   isBefore(filteredData[0]["Estimated Creation Date"], startTime),
      // );
      // console.log(
      //   filteredData[1]["Estimated Creation Date"],
      //   selectedStartTime,
      //   isBefore(filteredData[1]["Estimated Creation Date"], startTime),
      // );
      // console.log(
      //   filteredData[2]["Estimated Creation Date"],
      //   selectedStartTime,
      //   isBefore(filteredData[2]["Estimated Creation Date"], startTime),
      // );
    }
    if (selectedEndTime) {
      const endTime = parseISO(selectedEndTime);
      filteredData = filteredData.filter(
        (row) =>
          row["Estimated Creation Date"] &&
          !isAfter(row["Estimated Creation Date"], endTime),
      );
    }
  }
  // console.log("length", filteredData.length);
  return filteredData;
}
export { applyFilters };

export { classifyLicense, resolveMultipleLicenses };

export { mapLicenseCriteria };
