Skip to content

Feishu docs

FeishuDocsReader #

Bases: BaseReader

Feishu Docs reader.

Reads a page from Google Docs

Source code in llama-index-integrations/readers/llama-index-readers-feishu-docs/llama_index/readers/feishu_docs/base.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
class FeishuDocsReader(BaseReader):
    """
    Feishu Docs reader.

    Reads a page from Google Docs

    """

    host = "https://open.feishu.cn"
    documents_raw_content_url_path = "/open-apis/docx/v1/documents/{}/raw_content"
    tenant_access_token_internal_url_path = (
        "/open-apis/auth/v3/tenant_access_token/internal"
    )

    def __init__(self, app_id, app_secret) -> None:
        """

        Args:
            app_id: The unique identifier of the application is obtained after the application is created.
            app_secret: Application key, obtained after creating the application.

        """
        super().__init__()
        self.app_id = app_id
        self.app_secret = app_secret

        self.tenant_access_token = ""
        self.expire = 0

    def load_data(self, document_ids: List[str]) -> List[Document]:
        """
        Load data from the input directory.

        Args:
            document_ids (List[str]): a list of document ids.

        """
        if document_ids is None:
            raise ValueError('Must specify a "document_ids" in `load_kwargs`.')

        results = []
        for document_id in document_ids:
            doc = self._load_doc(document_id)
            results.append(Document(text=doc, extra_info={"document_id": document_id}))
        return results

    def _load_doc(self, document_id) -> str:
        """
        Load a document from Feishu Docs.

        Args:
            document_id: the document id.

        Returns:
            The document text.

        """
        url = self.host + self.documents_raw_content_url_path.format(document_id)
        if self.tenant_access_token == "" or self.expire < time.time():
            self._update_tenant_access_token()
        headers = {
            "Authorization": f"Bearer {self.tenant_access_token}",
            "Content-Type": "application/json; charset=utf-8",
        }
        response = requests.get(url, headers=headers)
        return response.json()["data"]["content"]

    def _update_tenant_access_token(self):
        """For update tenant_access_token."""
        url = self.host + self.tenant_access_token_internal_url_path
        headers = {"Content-Type": "application/json; charset=utf-8"}
        data = {"app_id": self.app_id, "app_secret": self.app_secret}
        response = requests.post(url, data=json.dumps(data), headers=headers)
        self.tenant_access_token = response.json()["tenant_access_token"]
        self.expire = time.time() + response.json()["expire"]

    def set_lark_domain(self):
        """The default API endpoints are for Feishu, in order to switch to Lark, we should use set_lark_domain."""
        self.host = "https://open.larksuite.com"

load_data #

load_data(document_ids: List[str]) -> List[Document]

Load data from the input directory.

Parameters:

Name Type Description Default
document_ids List[str]

a list of document ids.

required
Source code in llama-index-integrations/readers/llama-index-readers-feishu-docs/llama_index/readers/feishu_docs/base.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def load_data(self, document_ids: List[str]) -> List[Document]:
    """
    Load data from the input directory.

    Args:
        document_ids (List[str]): a list of document ids.

    """
    if document_ids is None:
        raise ValueError('Must specify a "document_ids" in `load_kwargs`.')

    results = []
    for document_id in document_ids:
        doc = self._load_doc(document_id)
        results.append(Document(text=doc, extra_info={"document_id": document_id}))
    return results

set_lark_domain #

set_lark_domain()

The default API endpoints are for Feishu, in order to switch to Lark, we should use set_lark_domain.

Source code in llama-index-integrations/readers/llama-index-readers-feishu-docs/llama_index/readers/feishu_docs/base.py
102
103
104
def set_lark_domain(self):
    """The default API endpoints are for Feishu, in order to switch to Lark, we should use set_lark_domain."""
    self.host = "https://open.larksuite.com"