scrapbox

scrapbox-client

PyPI version CI

Scrapbox (Helpfeel Cosense) Client

Install

pip install scrapbox-client

CLI

$ sbc
usage: sbc [-h] [--version] [--connect-sid CONNECT_SID | --connect-sid-file CONNECT_SID_FILE] {pages,all-pages,page,text,icon,file} ...

Scrapbox API client CLI

positional arguments:
  {pages,all-pages,page,text,icon,file}
                        Available commands
    pages               Get page list from a project
    all-pages           Get all pages from a project
    page                Get detailed information about a page
    text                Get text content of a page
    icon                Get icon URL for a page
    file                Download a file from Scrapbox

options:
  -h, --help            show this help message and exit
  --version, -V         Show program's version number and exit
  --connect-sid CONNECT_SID
                        Scrapbox authentication cookie (connect.sid)
  --connect-sid-file CONNECT_SID_FILE
                        Path to file containing connect.sid (default: ~/.config/sbc/connect.sid)

examples:
  sbc pages my-project --limit 10 --skip 10 --json
  sbc all-pages my-project --batch-size 500 --json
  sbc page my-project "Page Title" --json
  sbc text my-project "Page Title"
  sbc icon my-project "Page Title"
  sbc file 60190edf1176d9001c13f8e8.png --output image.png

priority of `connect.sid` source:
  1. --connect-sid argument
  2. --connect-sid-file argument
  3. ~/.config/sbc/connect.sid file
  4. SBC_CONNECT_SID environment variable

Library

Overview

from scrapbox.client import ScrapboxClient

PROJECT_NAME = "help-jp"
PAGE_TITLE = "ブラケティング"

# Access public project without authentication
with ScrapboxClient() as client:
    # Get page list
    pages = client.get_pages(PROJECT_NAME, skip=0, limit=5)
    print(f"Project: {pages.project_name}")
    print(f"Total pages: {pages.count}")
    print()
    print("First 5 pages:")
    for page in pages.pages:
        print(f"  - {page.title} (views: {page.views})")

    print()
    print()

    # Get individual page details
    print("Get page details:")
    page_detail = client.get_page(PROJECT_NAME, PAGE_TITLE)
    print(f"Title: {page_detail.title}")
    print(f"Lines: {page_detail.lines_count}")
    print(f"Characters: {page_detail.chars_count}")
    print(f"First 5 lines:")
    for line in page_detail.lines[:5]:
        print(f"  {line.text}")

    print()
    print()

    # Get page text
    print("Page text:")
    text = client.get_page_text(PROJECT_NAME, PAGE_TITLE)
    print(text[:200] + "...")

    print()
    print()

    # Get icon URL
    print("Icon URL:")
    icon_url = client.get_page_icon_url(PROJECT_NAME, PAGE_TITLE)
    print(icon_url)

print()
print()

# Access private project with authentication
# connect.sid is obtained from browser cookies
print("=== Example with authentication ===")
connect_sid = "s%3AykQ__xxxxx-.xxxxxxxxxxxxxxxxxxxxx%2Bxxxxxxxxx%2Bxxxxxxxxxxx"
with ScrapboxClient(connect_sid=connect_sid) as client:
    try:
        pages = client.get_pages("your-private-pj", limit=3)
        print(f"Project: {pages.project_name}")
        for page in pages.pages:
            print(f"  - {page.title}")
    except Exception as e:
        print(f"Error: {e}")

Image

from scrapbox.client import ScrapboxClient

with ScrapboxClient() as client:
    # Get image by specifying file ID
    file_id = "1a2b3c4d5e6f7g8h9i0j.JPG"
    print(f"Fetching file: {file_id}")

    try:
        image_data = client.get_file(file_id)
        print(f"Successfully fetched: {len(image_data)} bytes")

        # Save to file
        output_path = "downloaded_image.jpg"
        with open(output_path, "wb") as f:
            f.write(image_data)
        print(f"Saved: {output_path}")

    except Exception as e:
        print(f"Error: {e}")

    print()

    # Can also fetch with full URL
    print("Fetch with full URL:")
    try:
        full_url = "https://gyazo.com/da78df293f9e83a74b5402411e2f2e01"
        image_data2 = client.get_file(full_url)
        print(f"Successfully fetched: {len(image_data2)} bytes")
    except Exception as e:
        print(f"Error: {e}")

License

MIT

 1""".. include:: ../README.md"""  # noqa: D415
 2
 3import importlib.metadata
 4
 5from .client import ScrapboxClient
 6from .models import GyazoOEmbedResponse, Line, PageDetail, PageListItem, PageListResponse, User
 7
 8try:
 9    __version__ = importlib.metadata.version(__name__)
10except importlib.metadata.PackageNotFoundError:
11    __version__ = "0.0.0"
12
13__all__ = (
14    "GyazoOEmbedResponse",
15    "Line",
16    "PageDetail",
17    "PageListItem",
18    "PageListResponse",
19    "ScrapboxClient",
20    "User",
21)
class GyazoOEmbedResponse(pydantic.main.BaseModel, typing.Generic[~RootModelRootType]):
172class GyazoOEmbedResponse(RootModel[GyazoOEmbedResponsePhoto | GyazoOEmbedResponseVideo]):
173    """Response from the Gyazo oEmbed API.
174
175    See: https://gyazo.com/api/docs/image#oembed
176    """
177
178    model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)

Response from the Gyazo oEmbed API.

See: https://gyazo.com/api/docs/image#oembed

class Line(pydantic.main.BaseModel):
55class Line(BaseModel):
56    """Line data in a page."""
57
58    model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)
59
60    id: str
61    text: str
62    user_id: str = Field(alias="userId")
63    created: int
64    updated: int

Line data in a page.

id: str = PydanticUndefined
text: str = PydanticUndefined
user_id: str = PydanticUndefined
created: int = PydanticUndefined
updated: int = PydanticUndefined
class PageDetail(pydantic.main.BaseModel):
67class PageDetail(BaseModel):
68    """Detailed information about a page."""
69
70    model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)
71
72    id: str
73    title: str
74    image: str | None = None
75    descriptions: list[str]
76    user: User
77    last_update_user: User = Field(alias="lastUpdateUser")
78    pin: int
79    views: int
80    linked: int
81    commit_id: str | None = Field(None, alias="commitId")
82    created: int
83    updated: int
84    accessed: int
85    snapshot_created: int | None = Field(None, alias="snapshotCreated")
86    page_rank: float = Field(alias="pageRank")
87    last_accessed: int | None = Field(None, alias="lastAccessed")
88    lines_count: int = Field(alias="linesCount")
89    chars_count: int = Field(alias="charsCount")
90    helpfeels: list[str]
91    persistent: bool
92    lines: list[Line]

Detailed information about a page.

id: str = PydanticUndefined
title: str = PydanticUndefined
image: str | None = None
descriptions: list[str] = PydanticUndefined
user: User = PydanticUndefined
last_update_user: User = PydanticUndefined
pin: int = PydanticUndefined
views: int = PydanticUndefined
linked: int = PydanticUndefined
commit_id: str | None = None
created: int = PydanticUndefined
updated: int = PydanticUndefined
accessed: int = PydanticUndefined
snapshot_created: int | None = None
page_rank: float = PydanticUndefined
last_accessed: int | None = None
lines_count: int = PydanticUndefined
chars_count: int = PydanticUndefined
helpfeels: list[str] = PydanticUndefined
persistent: bool = PydanticUndefined
lines: list[Line] = PydanticUndefined
class PageListItem(pydantic.main.BaseModel):
21class PageListItem(BaseModel):
22    """An item in the page list."""
23
24    model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)
25
26    id: str
27    title: str
28    image: str | None = None
29    descriptions: list[str]
30    user: User
31    last_update_user: User = Field(alias="lastUpdateUser")
32    pin: int
33    views: int
34    linked: int
35    created: int
36    updated: int
37    accessed: int
38    lines_count: int = Field(alias="linesCount")
39    chars_count: int = Field(alias="charsCount")
40    helpfeels: list[str]

An item in the page list.

id: str = PydanticUndefined
title: str = PydanticUndefined
image: str | None = None
descriptions: list[str] = PydanticUndefined
user: User = PydanticUndefined
last_update_user: User = PydanticUndefined
pin: int = PydanticUndefined
views: int = PydanticUndefined
linked: int = PydanticUndefined
created: int = PydanticUndefined
updated: int = PydanticUndefined
accessed: int = PydanticUndefined
lines_count: int = PydanticUndefined
chars_count: int = PydanticUndefined
helpfeels: list[str] = PydanticUndefined
class PageListResponse(pydantic.main.BaseModel):
43class PageListResponse(BaseModel):
44    """Response from the page list API."""
45
46    model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)
47
48    project_name: str = Field(alias="projectName")
49    skip: int
50    limit: int
51    count: int
52    pages: list[PageListItem]

Response from the page list API.

project_name: str = PydanticUndefined
skip: int = PydanticUndefined
limit: int = PydanticUndefined
count: int = PydanticUndefined
pages: list[PageListItem] = PydanticUndefined
class ScrapboxClient:
 15class ScrapboxClient:
 16    """Scrapbox API client.
 17
 18    This client provides methods to interact with the Scrapbox API,
 19    including retrieving page lists, page details, page text, and files.
 20    """
 21
 22    """Base URL for the Scrapbox API."""
 23    BASE_URL = "https://scrapbox.io/api"
 24
 25    def __init__(self, connect_sid: str | None = None) -> None:
 26        """Initialize the Scrapbox API client.
 27
 28        Args:
 29            connect_sid: Scrapbox authentication cookie (connect.sid).
 30        """
 31        self.connect_sid = connect_sid
 32        self.client = httpx.Client(
 33            cookies={"connect.sid": connect_sid} if connect_sid else None,
 34            follow_redirects=True,
 35        )
 36
 37    def __enter__(self: Self) -> Self:
 38        """Enter the runtime context related to this object."""
 39        return self
 40
 41    def __exit__(self, typ: type[BaseException] | None, exc: BaseException | None, tb: TracebackType | None, /) -> None:
 42        """Exit the runtime context related to this object."""
 43        self.client.close()
 44
 45    def close(self) -> None:
 46        """Close the HTTP client."""
 47        self.client.close()
 48
 49    def get_pages(self, project_name: str, skip: int = 0, limit: int = 100) -> PageListResponse:
 50        """Get a list of pages from a project.
 51
 52        Args:
 53            project_name: The name of the project.
 54            skip: Number of pages to skip (default: 0).
 55            limit: Number of pages to retrieve (default: 100).
 56
 57        Returns:
 58            PageListResponse: The response containing the page list.
 59        """
 60        url = f"{self.BASE_URL}/pages/{project_name}"
 61        params = {"skip": skip, "limit": limit}
 62
 63        response = self.client.get(url, params=params)
 64        response.raise_for_status()
 65
 66        return PageListResponse.model_validate(response.json())
 67
 68    def get_page(self, project_name: str, page_title: str) -> PageDetail:
 69        """Get detailed information about a specific page.
 70
 71        Args:
 72            project_name: The name of the project.
 73            page_title: The title of the page.
 74
 75        Returns:
 76            PageDetail: The detailed information about the page.
 77        """
 78        encoded_title = quote(page_title, safe="")
 79        url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}"
 80
 81        response = self.client.get(url)
 82        response.raise_for_status()
 83
 84        return PageDetail.model_validate(response.json())
 85
 86    def get_page_text(self, project_name: str, page_title: str) -> str:
 87        """Get the text content of a page.
 88
 89        Args:
 90            project_name: The name of the project.
 91            page_title: The title of the page.
 92
 93        Returns:
 94            str: The text content of the page.
 95        """
 96        encoded_title = quote(page_title, safe="")
 97        url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}/text"
 98
 99        response = self.client.get(url)
100        response.raise_for_status()
101
102        return response.text
103
104    def get_page_icon_url(self, project_name: str, page_title: str) -> str:
105        """Get the icon image URL for a page.
106
107        This method returns the redirect destination URL of the page icon.
108
109        Args:
110            project_name: The name of the project.
111            page_title: The title of the page.
112
113        Returns:
114            str: The URL of the icon image.
115        """
116        encoded_title = quote(page_title, safe="")
117        url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}/icon"
118
119        response = self.client.get(url, follow_redirects=False)
120
121        if response.status_code == httpx.codes.FOUND:
122            return response.headers.get("location", "")
123        if response.status_code == httpx.codes.OK:
124            return url
125        response.raise_for_status()
126        return url
127
128    def get_file(self, file_id: str) -> bytes:
129        """Get a file uploaded to Scrapbox.
130
131        Args:
132            file_id: The file ID (e.g., "1a2b3c4d5e6f7g8h9i0j.JPG")
133                or full URL (e.g., "https://scrapbox.io/files/1a2b3c4d5e6f7g8h9i0j.JPG"
134                or "https://gyazo.com/1a2b3c4d5e6f7g8h9i0j1a2b3c4d5e6f").
135
136        Returns:
137            bytes: The binary data of the file.
138        """
139        url = file_id if file_id.startswith(("http://", "https://")) else f"https://scrapbox.io/files/{file_id}"
140
141        parsed_url = urlparse(url)
142        if parsed_url.hostname and "gyazo.com" in parsed_url.hostname:
143            # If URL already has a file extension (e.g., .mp4, .jpg), directly convert to i.gyazo.com
144            path = parsed_url.path.strip("/")
145            if "." in path.split("/")[-1]:  # Check if last path segment has extension
146                url = f"https://i.gyazo.com/{path}"
147            else:
148                # Use oEmbed API to get the actual file URL
149                oembed_url = f"{self.BASE_URL}/oembed-proxy/gyazo"
150                response = self.client.get(oembed_url, params={"url": url})
151                response.raise_for_status()
152                json = response.json()
153                if (oembed_type := json.get("type")) not in ("photo", "video"):
154                    msg = f"Unsupported Gyazo oEmbed type: {oembed_type}"
155                    raise ValueError(msg)
156                oembed_data = GyazoOEmbedResponse.model_validate(json)
157                if isinstance(oembed_data.root, GyazoOEmbedResponsePhoto):
158                    url = oembed_data.root.url
159                else:  # video
160                    # Extract Gyazo ID from the original URL and construct direct video URL
161                    gyazo_id = parsed_url.path.strip("/")
162                    url = f"https://i.gyazo.com/{gyazo_id}.mp4"
163        response = self.client.get(url)
164        response.raise_for_status()
165
166        return response.content

Scrapbox API client.

This client provides methods to interact with the Scrapbox API, including retrieving page lists, page details, page text, and files.

ScrapboxClient(connect_sid: str | None = None)
25    def __init__(self, connect_sid: str | None = None) -> None:
26        """Initialize the Scrapbox API client.
27
28        Args:
29            connect_sid: Scrapbox authentication cookie (connect.sid).
30        """
31        self.connect_sid = connect_sid
32        self.client = httpx.Client(
33            cookies={"connect.sid": connect_sid} if connect_sid else None,
34            follow_redirects=True,
35        )

Initialize the Scrapbox API client.

Arguments:
  • connect_sid: Scrapbox authentication cookie (connect.sid).
BASE_URL = 'https://scrapbox.io/api'
connect_sid
client
def close(self) -> None:
45    def close(self) -> None:
46        """Close the HTTP client."""
47        self.client.close()

Close the HTTP client.

def get_pages( self, project_name: str, skip: int = 0, limit: int = 100) -> PageListResponse:
49    def get_pages(self, project_name: str, skip: int = 0, limit: int = 100) -> PageListResponse:
50        """Get a list of pages from a project.
51
52        Args:
53            project_name: The name of the project.
54            skip: Number of pages to skip (default: 0).
55            limit: Number of pages to retrieve (default: 100).
56
57        Returns:
58            PageListResponse: The response containing the page list.
59        """
60        url = f"{self.BASE_URL}/pages/{project_name}"
61        params = {"skip": skip, "limit": limit}
62
63        response = self.client.get(url, params=params)
64        response.raise_for_status()
65
66        return PageListResponse.model_validate(response.json())

Get a list of pages from a project.

Arguments:
  • project_name: The name of the project.
  • skip: Number of pages to skip (default: 0).
  • limit: Number of pages to retrieve (default: 100).
Returns:

PageListResponse: The response containing the page list.

def get_page(self, project_name: str, page_title: str) -> PageDetail:
68    def get_page(self, project_name: str, page_title: str) -> PageDetail:
69        """Get detailed information about a specific page.
70
71        Args:
72            project_name: The name of the project.
73            page_title: The title of the page.
74
75        Returns:
76            PageDetail: The detailed information about the page.
77        """
78        encoded_title = quote(page_title, safe="")
79        url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}"
80
81        response = self.client.get(url)
82        response.raise_for_status()
83
84        return PageDetail.model_validate(response.json())

Get detailed information about a specific page.

Arguments:
  • project_name: The name of the project.
  • page_title: The title of the page.
Returns:

PageDetail: The detailed information about the page.

def get_page_text(self, project_name: str, page_title: str) -> str:
 86    def get_page_text(self, project_name: str, page_title: str) -> str:
 87        """Get the text content of a page.
 88
 89        Args:
 90            project_name: The name of the project.
 91            page_title: The title of the page.
 92
 93        Returns:
 94            str: The text content of the page.
 95        """
 96        encoded_title = quote(page_title, safe="")
 97        url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}/text"
 98
 99        response = self.client.get(url)
100        response.raise_for_status()
101
102        return response.text

Get the text content of a page.

Arguments:
  • project_name: The name of the project.
  • page_title: The title of the page.
Returns:

str: The text content of the page.

def get_page_icon_url(self, project_name: str, page_title: str) -> str:
104    def get_page_icon_url(self, project_name: str, page_title: str) -> str:
105        """Get the icon image URL for a page.
106
107        This method returns the redirect destination URL of the page icon.
108
109        Args:
110            project_name: The name of the project.
111            page_title: The title of the page.
112
113        Returns:
114            str: The URL of the icon image.
115        """
116        encoded_title = quote(page_title, safe="")
117        url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}/icon"
118
119        response = self.client.get(url, follow_redirects=False)
120
121        if response.status_code == httpx.codes.FOUND:
122            return response.headers.get("location", "")
123        if response.status_code == httpx.codes.OK:
124            return url
125        response.raise_for_status()
126        return url

Get the icon image URL for a page.

This method returns the redirect destination URL of the page icon.

Arguments:
  • project_name: The name of the project.
  • page_title: The title of the page.
Returns:

str: The URL of the icon image.

def get_file(self, file_id: str) -> bytes:
128    def get_file(self, file_id: str) -> bytes:
129        """Get a file uploaded to Scrapbox.
130
131        Args:
132            file_id: The file ID (e.g., "1a2b3c4d5e6f7g8h9i0j.JPG")
133                or full URL (e.g., "https://scrapbox.io/files/1a2b3c4d5e6f7g8h9i0j.JPG"
134                or "https://gyazo.com/1a2b3c4d5e6f7g8h9i0j1a2b3c4d5e6f").
135
136        Returns:
137            bytes: The binary data of the file.
138        """
139        url = file_id if file_id.startswith(("http://", "https://")) else f"https://scrapbox.io/files/{file_id}"
140
141        parsed_url = urlparse(url)
142        if parsed_url.hostname and "gyazo.com" in parsed_url.hostname:
143            # If URL already has a file extension (e.g., .mp4, .jpg), directly convert to i.gyazo.com
144            path = parsed_url.path.strip("/")
145            if "." in path.split("/")[-1]:  # Check if last path segment has extension
146                url = f"https://i.gyazo.com/{path}"
147            else:
148                # Use oEmbed API to get the actual file URL
149                oembed_url = f"{self.BASE_URL}/oembed-proxy/gyazo"
150                response = self.client.get(oembed_url, params={"url": url})
151                response.raise_for_status()
152                json = response.json()
153                if (oembed_type := json.get("type")) not in ("photo", "video"):
154                    msg = f"Unsupported Gyazo oEmbed type: {oembed_type}"
155                    raise ValueError(msg)
156                oembed_data = GyazoOEmbedResponse.model_validate(json)
157                if isinstance(oembed_data.root, GyazoOEmbedResponsePhoto):
158                    url = oembed_data.root.url
159                else:  # video
160                    # Extract Gyazo ID from the original URL and construct direct video URL
161                    gyazo_id = parsed_url.path.strip("/")
162                    url = f"https://i.gyazo.com/{gyazo_id}.mp4"
163        response = self.client.get(url)
164        response.raise_for_status()
165
166        return response.content

Get a file uploaded to Scrapbox.

Arguments:
  • file_id: The file ID (e.g., "1a2b3c4d5e6f7g8h9i0j.JPG") or full URL (e.g., "https://scrapbox.io/files/1a2b3c4d5e6f7g8h9i0j.JPG" or "https://gyazo.com/1a2b3c4d5e6f7g8h9i0j1a2b3c4d5e6f").
Returns:

bytes: The binary data of the file.

class User(pydantic.main.BaseModel):
10class User(BaseModel):
11    """User information."""
12
13    model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)
14
15    id: str
16    name: str | None = None
17    display_name: str | None = None
18    photo: str | None = None

User information.

id: str = PydanticUndefined
name: str | None = None
display_name: str | None = None
photo: str | None = None