// TODO: rename this file to pdf_search
import { PDFDocumentProxy, TextContent, PDFPageProxy, TextContentItem } from 'pdfjs-dist';
import Immutable from 'immutable';
import { defaults, defer, findIndex, get, isEmpty, keys, last, range, uniqueId } from 'lodash/fp';
import { mapKeys } from 'lodash';
import { SearchMatchResult, getQueryRE, getMatchRanges } from './pdf_search_utils';
export interface SearchResults {
  [pageIndex: number]: {
    [itemIndex: number]: SearchMatchResult[];
  };
}

type SearchProgressCallback = ({
  progress,
  done,
}: {
  progress: SearchProgress;
  done: boolean;
  cancelled: boolean;
}) => any;

export interface SearchConfig {
  onProgress?: SearchProgressCallback;
  pages?: number[]; // pages to perform search on
  pageItems?: {
    // items ranges per page to perform search in. Once used, `pages` must be provided
    // page number
    [key: number]: {
      startItem?: number;
      endItem?: number;
    };
  };
  matchWholeWord?: boolean;
  caseSensitive?: boolean;
}

export interface SearchProgress {
  results: SearchResults;
  progress: number;
}

interface PageConentItem {
  itemString: string;
  startOffset: number;
  endOffset: number;
}

interface DocPageContent {
  content: string;
  items: PageConentItem[];
}

const DEFAULT_SEARCH_CONFIG: SearchConfig = {
  matchWholeWord: false,
  caseSensitive: false,
};

function getSearchProgressProvider(pagesCount: number) {
  const total = pagesCount;
  let results: SearchResults = {};
  let progressRatio = 0;

  class SearchProgressProvider {
    getState(): SearchProgress {
      return {
        results: keys(results)
          .filter((pageIdx) => !isEmpty(results[pageIdx]))
          .reduce((acc, pageIdx) => ({ ...acc, [pageIdx]: results[pageIdx] }), {} as SearchResults),
        progress: progressRatio,
      };
    }

    addPageResults(pageNum: number, matchedItems: TextItemsSearchMatches) {
      results = Immutable.fromJS(results)
        .mergeDeep({ [pageNum]: matchedItems })
        .toJS();
      progressRatio = total === 0 ? 0 : keys(results).length / total;

      return this;
    }
  }

  return new SearchProgressProvider();
}

function contentItemsToDocPageContent(items: TextContentItem[]): DocPageContent {
  return items.reduce(
    (acc, textItem) => {
      const { content, items } = acc;
      const prevItemEndOffset = isEmpty(items) ? 0 : (last(items)! as any).endOffset;

      return {
        content: content + textItem.str,
        items: items.concat({
          startOffset: prevItemEndOffset,
          endOffset: prevItemEndOffset + textItem.str.length,
          itemString: textItem.str,
        }),
      };
    },
    { content: '', items: [] as PageConentItem[] }
  );
}
type TextItemsSearchMatches = { [itemIndex: number]: SearchMatchResult[] };

function pageMatchesToPerItemMatches(
  searchMatches: SearchMatchResult[],
  pageContentItems: PageConentItem[]
): TextItemsSearchMatches {
  return searchMatches
    .reduce((itemsSearchMatches, searchMatch) => {
      const { offset: matchStartOffset, length: matchLength } = searchMatch;
      const matchEndOffset = matchStartOffset + matchLength;
      // need to select all page content items starting from one holding the start offset of the match
      // and all consequent ones untill the page content item holding the end of the match
      const startingItemIndex = findIndex(
        ({ startOffset, endOffset }) =>
          matchStartOffset >= startOffset && matchStartOffset < endOffset,
        pageContentItems
      );
      const endingItemIndex = pageContentItems.findIndex(
        ({ endOffset }, idx) => idx >= startingItemIndex && endOffset >= matchEndOffset
      );

      const matchedPageItems = pageContentItems.slice(startingItemIndex, endingItemIndex + 1);

      return matchedPageItems.reduce((acc, pageItem, itemIndex) => {
        const { startOffset: itemStartOffset, endOffset: itemEndOffset, itemString } = pageItem;
        // offset here stands for offset within the page content item string. It means that if
        // search match expands beyound one content item, all consequent (after the fist one)
        // items' offsets will be 0 (match continues from the start of the consequent text item).
        // for the fist content item the offset within it will be the difference between its own
        // start offset and start offset of the match.
        const offset = itemIndex === 0 ? matchStartOffset - itemStartOffset : 0;
        // length here stands for how much of this page item's content is consumed by the match result
        const length =
          Math.min(matchEndOffset, itemEndOffset) - Math.max(matchStartOffset, itemStartOffset);

        return acc.mergeDeep({
          [itemIndex + startingItemIndex]: [
            {
              offset,
              length,
              match: itemString.slice(offset, offset + length),
            },
          ],
        });
      }, itemsSearchMatches);
    }, Immutable.Map())
    .toJS() as TextItemsSearchMatches;
}

type SearchStreamValue = {
  pageNum: number;
  matchedItems: TextItemsSearchMatches;
};

function getDocSearchStream(
  doc: PDFDocumentProxy,
  query: string,
  config: SearchConfig
): ReadableStream {
  // TODO: check if ReadtablStream needs to be polyfilled
  return new ReadableStream<SearchStreamValue>({
    async start(controller) {
      const queryRE = getQueryRE(query, config);
      const { pages } = config;
      const pagesNumsToSearch = pages ? pages : range(1, doc.numPages + 1);

      for (
        let pageIdx = 0, pagesCount = pagesNumsToSearch.length;
        pageIdx < pagesCount;
        pageIdx++
      ) {
        const pageNum = pagesNumsToSearch[pageIdx];

        if (
          !getDocSearchStream.cache.has(doc) ||
          !getDocSearchStream.cache.get(doc)!.has(pageNum)
        ) {
          const page: PDFPageProxy = await doc.getPage(pageNum);
          const pageContent = await page.getTextContent();
          getDocSearchStream.cache = getDocSearchStream.cache.setIn([doc, pageNum], pageContent);
        }

        const pageContent = getDocSearchStream.cache.getIn([doc, pageNum]);
        const startItem = get(['pageItems', pageNum, 'startItem'], config) ?? 0;
        const endItem =
          get(['pageItems', pageNum, 'endItem'], config) ?? pageContent.items.length - 1;

        // concat all items contents into one big page content string, keeping record of each item
        // range within that string
        const { content: pageStr, items } = contentItemsToDocPageContent(
          pageContent.items.slice(startItem, endItem + 1)
        );
        // get matches for the search query within page string
        const pageMatches = getMatchRanges(pageStr, queryRE);
        // convert matched text ranges into per item match range (one matched string may take > 1
        // item's content)
        const perItemMatches = pageMatchesToPerItemMatches(pageMatches, items);
        // finally adjust the item's indexes if the search was done not on whole page, but on some
        // portion of it (startItem is not 0)
        const matchedItems =
          startItem > 0
            ? // using lodash mapKeys instead of lodash/fp due to no single argument cap
              mapKeys(perItemMatches, (_v, itemIdx) => parseInt(itemIdx) + startItem)
            : perItemMatches;

        controller.enqueue({ pageNum, matchedItems });
      }

      // once reached here, the search is over. Stream can be closed
      controller.close();
    },
  });
}
type DocSearchCache = Immutable.Map<PDFDocumentProxy, Immutable.Map<number, TextContent>>;

getDocSearchStream.cache = Immutable.Map() as DocSearchCache;

async function searchDoc(
  doc: PDFDocumentProxy,
  query: string,
  onProgress: ({ pageResults, done }: { pageResults?: SearchStreamValue; done: boolean }) => any,
  searchConfig: SearchConfig
) {
  const searchStream = getDocSearchStream(doc, query, searchConfig);
  const reader: ReadableStreamDefaultReader<SearchStreamValue> = searchStream.getReader();

  function push() {
    reader
      .read()
      .then(({ value: pageResults, done }: { value?: SearchStreamValue; done: boolean }) => {
        onProgress({ pageResults, done });

        if (!done) {
          push();
        }
      });
  }

  push();

  return reader;
}

export interface PDFSearchHandle {
  promise: Promise<SearchResults>;
  key: string | null;
}

class PDFSearch {
  searches: Immutable.Map<string, ReadableStreamDefaultReader<SearchStreamValue>>;

  constructor() {
    this.searches = Immutable.Map();
  }

  cancelSearch(searchKey: string) {
    const searchReader = this.searches.get(searchKey);
    if (searchReader) {
      searchReader.cancel();
      this.searches = this.searches.delete(searchKey);
    }
  }

  getDocSearch =
    (Doc: PDFDocumentProxy) =>
    (searchText: string, searchConfig?: SearchConfig): PDFSearchHandle => {
      // validate search config
      if (searchConfig && searchConfig.pageItems && !searchConfig.pages) {
        return {
          promise: Promise.reject(
            'Bad search config: `pages` not provided. Either provide `pages` or remove `pageItems`.'
          ),
          key: null,
        };
      }
      // empty search text should not be searched
      if (isEmpty(searchText.trim()))
        return {
          promise: Promise.resolve({}),
          key: null,
        };
      // initiate real search
      const searchKey = uniqueId('');
      const config = defaults(DEFAULT_SEARCH_CONFIG, searchConfig);

      const searchResultsPromise: Promise<SearchResults> = new Promise((resolve, reject) => {
        let searchProgress = getSearchProgressProvider(
          config.pages ? config.pages.length : Doc.numPages
        );

        const progressHandler = ({
          pageResults,
          done,
        }: {
          pageResults?: SearchStreamValue;
          done: boolean;
        }) => {
          if (done) {
            resolve(searchProgress.getState().results);
          } else if (pageResults) {
            const { pageNum, matchedItems } = pageResults;
            // accumulate results
            searchProgress.addPageResults(pageNum, matchedItems);
          }
          // notify consumer of search progress if she is interested
          defer(() => {
            config.onProgress &&
              config.onProgress({
                progress: searchProgress.getState(),
                done,
                cancelled: !this.searches.has(searchKey),
              });
          });
        };

        searchDoc(Doc, searchText, progressHandler, config)
          .then((searchReader: ReadableStreamDefaultReader<SearchStreamValue>) => {
            this.searches = this.searches.set(searchKey, searchReader);
          })
          .catch((err) => reject(err));
      });

      return {
        promise: searchResultsPromise,
        key: searchKey,
      };
    };
}

export default new PDFSearch();
