scrapbox

scrapbox-client

Install

pip install scrapbox-client

CLI

$ sbc
usage: sbc [-h] [--version] [--connect-sid CONNECT_SID | --connect-sid-file CONNECT_SID_FILE] {pages,all-pages,page,text,icon,file} ...

Scrapbox API client CLI

positional arguments:
  {pages,all-pages,page,text,icon,file}
                        Available commands
    pages               Get page list from a project
    all-pages           Get all pages from a project
    page                Get detailed information about a page
    text                Get text content of a page
    icon                Get icon URL for a page
    file                Download a file from Scrapbox

options:
  -h, --help            show this help message and exit
  --version, -V         Show program's version number and exit
  --connect-sid CONNECT_SID
                        Scrapbox authentication cookie (connect.sid)
  --connect-sid-file CONNECT_SID_FILE
                        Path to file containing connect.sid (default: ~/.config/sbc/connect.sid)

examples:
  sbc pages my-project --limit 10 --skip 10 --json
  sbc all-pages my-project --batch-size 500 --json
  sbc page my-project "Page Title" --json
  sbc text my-project "Page Title"
  sbc icon my-project "Page Title"
  sbc file 60190edf1176d9001c13f8e8.png --output image.png

priority of `connect.sid` source:
  1. --connect-sid argument
  2. --connect-sid-file argument
  3. ~/.config/sbc/connect.sid file
  4. SBC_CONNECT_SID environment variable

Library

Overview

from scrapbox.client import ScrapboxClient

PROJECT_NAME = "help-jp"
PAGE_TITLE = "ブラケティング"

# Access public project without authentication
with ScrapboxClient() as client:
    # Get page list
    pages = client.get_pages(PROJECT_NAME, skip=0, limit=5)
    print(f"Project: {pages.project_name}")
    print(f"Total pages: {pages.count}")
    print()
    print("First 5 pages:")
    for page in pages.pages:
        print(f"  - {page.title} (views: {page.views})")

    print()
    print()

    # Get individual page details
    print("Get page details:")
    page_detail = client.get_page(PROJECT_NAME, PAGE_TITLE)
    print(f"Title: {page_detail.title}")
    print(f"Lines: {page_detail.lines_count}")
    print(f"Characters: {page_detail.chars_count}")
    print(f"First 5 lines:")
    for line in page_detail.lines[:5]:
        print(f"  {line.text}")

    print()
    print()

    # Get page text
    print("Page text:")
    text = client.get_page_text(PROJECT_NAME, PAGE_TITLE)
    print(text[:200] + "...")

    print()
    print()

    # Get icon URL
    print("Icon URL:")
    icon_url = client.get_page_icon_url(PROJECT_NAME, PAGE_TITLE)
    print(icon_url)

print()
print()

# Access private project with authentication
# connect.sid is obtained from browser cookies
print("=== Example with authentication ===")
connect_sid = "s%3AykQ__xxxxx-.xxxxxxxxxxxxxxxxxxxxx%2Bxxxxxxxxx%2Bxxxxxxxxxxx"
with ScrapboxClient(connect_sid=connect_sid) as client:
    try:
        pages = client.get_pages("your-private-pj", limit=3)
        print(f"Project: {pages.project_name}")
        for page in pages.pages:
            print(f"  - {page.title}")
    except Exception as e:
        print(f"Error: {e}")

Image

from scrapbox.client import ScrapboxClient

with ScrapboxClient() as client:
    # Get image by specifying file ID
    file_id = "1a2b3c4d5e6f7g8h9i0j.JPG"
    print(f"Fetching file: {file_id}")

    try:
        image_data = client.get_file(file_id)
        print(f"Successfully fetched: {len(image_data)} bytes")

        # Save to file
        output_path = "downloaded_image.jpg"
        with open(output_path, "wb") as f:
            f.write(image_data)
        print(f"Saved: {output_path}")

    except Exception as e:
        print(f"Error: {e}")

    print()

    # Can also fetch with full URL
    print("Fetch with full URL:")
    try:
        full_url = "https://gyazo.com/da78df293f9e83a74b5402411e2f2e01"
        image_data2 = client.get_file(full_url)
        print(f"Successfully fetched: {len(image_data2)} bytes")
    except Exception as e:
        print(f"Error: {e}")

License

MIT

View Source

 1""".. include:: ../README.md"""  # noqa: D415
 2
 3import importlib.metadata
 4
 5from .client import ScrapboxClient
 6from .models import GyazoOEmbedResponse, Line, PageDetail, PageListItem, PageListResponse, User
 7
 8try:
 9    __version__ = importlib.metadata.version(__name__)
10except importlib.metadata.PackageNotFoundError:
11    __version__ = "0.0.0"
12
13__all__ = (
14    "GyazoOEmbedResponse",
15    "Line",
16    "PageDetail",
17    "PageListItem",
18    "PageListResponse",
19    "ScrapboxClient",
20    "User",
21)

class GyazoOEmbedResponse(pydantic.main.BaseModel, typing.Generic[~RootModelRootType]): View Source

172class GyazoOEmbedResponse(RootModel[GyazoOEmbedResponsePhoto | GyazoOEmbedResponseVideo]):
173    """Response from the Gyazo oEmbed API.
174
175    See: https://gyazo.com/api/docs/image#oembed
176    """
177
178    model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)

Response from the Gyazo oEmbed API.

See: https://gyazo.com/api/docs/image#oembed

class Line(pydantic.main.BaseModel): View Source

55class Line(BaseModel):
56    """Line data in a page."""
57
58    model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)
59
60    id: str
61    text: str
62    user_id: str = Field(alias="userId")
63    created: int
64    updated: int

Line data in a page.

id: str = PydanticUndefined

text: str = PydanticUndefined

user_id: str = PydanticUndefined

created: int = PydanticUndefined

updated: int = PydanticUndefined

class PageDetail(pydantic.main.BaseModel): View Source

67class PageDetail(BaseModel):
68    """Detailed information about a page."""
69
70    model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)
71
72    id: str
73    title: str
74    image: str | None = None
75    descriptions: list[str]
76    user: User
77    last_update_user: User = Field(alias="lastUpdateUser")
78    pin: int
79    views: int
80    linked: int
81    commit_id: str | None = Field(None, alias="commitId")
82    created: int
83    updated: int
84    accessed: int
85    snapshot_created: int | None = Field(None, alias="snapshotCreated")
86    page_rank: float = Field(alias="pageRank")
87    last_accessed: int | None = Field(None, alias="lastAccessed")
88    lines_count: int = Field(alias="linesCount")
89    chars_count: int = Field(alias="charsCount")
90    helpfeels: list[str]
91    persistent: bool
92    lines: list[Line]

Detailed information about a page.

id: str = PydanticUndefined

title: str = PydanticUndefined

image: str | None = None

descriptions: list[str] = PydanticUndefined

user: User = PydanticUndefined

last_update_user: User = PydanticUndefined

pin: int = PydanticUndefined

views: int = PydanticUndefined

linked: int = PydanticUndefined

commit_id: str | None = None

created: int = PydanticUndefined

updated: int = PydanticUndefined

accessed: int = PydanticUndefined

snapshot_created: int | None = None

page_rank: float = PydanticUndefined

last_accessed: int | None = None

lines_count: int = PydanticUndefined

chars_count: int = PydanticUndefined

helpfeels: list[str] = PydanticUndefined

persistent: bool = PydanticUndefined

lines: list[Line] = PydanticUndefined

class PageListItem(pydantic.main.BaseModel): View Source

21class PageListItem(BaseModel):
22    """An item in the page list."""
23
24    model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)
25
26    id: str
27    title: str
28    image: str | None = None
29    descriptions: list[str]
30    user: User
31    last_update_user: User = Field(alias="lastUpdateUser")
32    pin: int
33    views: int
34    linked: int
35    created: int
36    updated: int
37    accessed: int
38    lines_count: int = Field(alias="linesCount")
39    chars_count: int = Field(alias="charsCount")
40    helpfeels: list[str]

An item in the page list.

id: str = PydanticUndefined

title: str = PydanticUndefined

image: str | None = None

descriptions: list[str] = PydanticUndefined

user: User = PydanticUndefined

last_update_user: User = PydanticUndefined

pin: int = PydanticUndefined

views: int = PydanticUndefined

linked: int = PydanticUndefined

created: int = PydanticUndefined

updated: int = PydanticUndefined

accessed: int = PydanticUndefined

lines_count: int = PydanticUndefined

chars_count: int = PydanticUndefined

helpfeels: list[str] = PydanticUndefined

class PageListResponse(pydantic.main.BaseModel): View Source

43class PageListResponse(BaseModel):
44    """Response from the page list API."""
45
46    model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)
47
48    project_name: str = Field(alias="projectName")
49    skip: int
50    limit: int
51    count: int
52    pages: list[PageListItem]

Response from the page list API.

project_name: str = PydanticUndefined

skip: int = PydanticUndefined

limit: int = PydanticUndefined

count: int = PydanticUndefined

pages: list[PageListItem] = PydanticUndefined

class ScrapboxClient: View Source

 15class ScrapboxClient:
 16    """Scrapbox API client.
 17
 18    This client provides methods to interact with the Scrapbox API,
 19    including retrieving page lists, page details, page text, and files.
 20    """
 21
 22    """Base URL for the Scrapbox API."""
 23    BASE_URL = "https://scrapbox.io/api"
 24
 25    def __init__(self, connect_sid: str | None = None) -> None:
 26        """Initialize the Scrapbox API client.
 27
 28        Args:
 29            connect_sid: Scrapbox authentication cookie (connect.sid).
 30        """
 31        self.connect_sid = connect_sid
 32        self.client = httpx.Client(
 33            cookies={"connect.sid": connect_sid} if connect_sid else None,
 34            follow_redirects=True,
 35        )
 36
 37    def __enter__(self: Self) -> Self:
 38        """Enter the runtime context related to this object."""
 39        return self
 40
 41    def __exit__(self, typ: type[BaseException] | None, exc: BaseException | None, tb: TracebackType | None, /) -> None:
 42        """Exit the runtime context related to this object."""
 43        self.client.close()
 44
 45    def close(self) -> None:
 46        """Close the HTTP client."""
 47        self.client.close()
 48
 49    def get_pages(self, project_name: str, skip: int = 0, limit: int = 100) -> PageListResponse:
 50        """Get a list of pages from a project.
 51
 52        Args:
 53            project_name: The name of the project.
 54            skip: Number of pages to skip (default: 0).
 55            limit: Number of pages to retrieve (default: 100).
 56
 57        Returns:
 58            PageListResponse: The response containing the page list.
 59        """
 60        url = f"{self.BASE_URL}/pages/{project_name}"
 61        params = {"skip": skip, "limit": limit}
 62
 63        response = self.client.get(url, params=params)
 64        response.raise_for_status()
 65
 66        return PageListResponse.model_validate(response.json())
 67
 68    def get_page(self, project_name: str, page_title: str) -> PageDetail:
 69        """Get detailed information about a specific page.
 70
 71        Args:
 72            project_name: The name of the project.
 73            page_title: The title of the page.
 74
 75        Returns:
 76            PageDetail: The detailed information about the page.
 77        """
 78        encoded_title = quote(page_title, safe="")
 79        url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}"
 80
 81        response = self.client.get(url)
 82        response.raise_for_status()
 83
 84        return PageDetail.model_validate(response.json())
 85
 86    def get_page_text(self, project_name: str, page_title: str) -> str:
 87        """Get the text content of a page.
 88
 89        Args:
 90            project_name: The name of the project.
 91            page_title: The title of the page.
 92
 93        Returns:
 94            str: The text content of the page.
 95        """
 96        encoded_title = quote(page_title, safe="")
 97        url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}/text"
 98
 99        response = self.client.get(url)
100        response.raise_for_status()
101
102        return response.text
103
104    def get_page_icon_url(self, project_name: str, page_title: str) -> str:
105        """Get the icon image URL for a page.
106
107        This method returns the redirect destination URL of the page icon.
108
109        Args:
110            project_name: The name of the project.
111            page_title: The title of the page.
112
113        Returns:
114            str: The URL of the icon image.
115        """
116        encoded_title = quote(page_title, safe="")
117        url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}/icon"
118
119        response = self.client.get(url, follow_redirects=False)
120
121        if response.status_code == httpx.codes.FOUND:
122            return response.headers.get("location", "")
123        if response.status_code == httpx.codes.OK:
124            return url
125        response.raise_for_status()
126        return url
127
128    def get_file(self, file_id: str) -> bytes:
129        """Get a file uploaded to Scrapbox.
130
131        Args:
132            file_id: The file ID (e.g., "1a2b3c4d5e6f7g8h9i0j.JPG")
133                or full URL (e.g., "https://scrapbox.io/files/1a2b3c4d5e6f7g8h9i0j.JPG"
134                or "https://gyazo.com/1a2b3c4d5e6f7g8h9i0j1a2b3c4d5e6f").
135
136        Returns:
137            bytes: The binary data of the file.
138        """
139        url = file_id if file_id.startswith(("http://", "https://")) else f"https://scrapbox.io/files/{file_id}"
140
141        parsed_url = urlparse(url)
142        if parsed_url.hostname and "gyazo.com" in parsed_url.hostname:
143            # If URL already has a file extension (e.g., .mp4, .jpg), directly convert to i.gyazo.com
144            path = parsed_url.path.strip("/")
145            if "." in path.split("/")[-1]:  # Check if last path segment has extension
146                url = f"https://i.gyazo.com/{path}"
147            else:
148                # Use oEmbed API to get the actual file URL
149                oembed_url = f"{self.BASE_URL}/oembed-proxy/gyazo"
150                response = self.client.get(oembed_url, params={"url": url})
151                response.raise_for_status()
152                json = response.json()
153                if (oembed_type := json.get("type")) not in ("photo", "video"):
154                    msg = f"Unsupported Gyazo oEmbed type: {oembed_type}"
155                    raise ValueError(msg)
156                oembed_data = GyazoOEmbedResponse.model_validate(json)
157                if isinstance(oembed_data.root, GyazoOEmbedResponsePhoto):
158                    url = oembed_data.root.url
159                else:  # video
160                    # Extract Gyazo ID from the original URL and construct direct video URL
161                    gyazo_id = parsed_url.path.strip("/")
162                    url = f"https://i.gyazo.com/{gyazo_id}.mp4"
163        response = self.client.get(url)
164        response.raise_for_status()
165
166        return response.content

Scrapbox API client.

This client provides methods to interact with the Scrapbox API, including retrieving page lists, page details, page text, and files.

ScrapboxClient(connect_sid: str | None = None) View Source

25    def __init__(self, connect_sid: str | None = None) -> None:
26        """Initialize the Scrapbox API client.
27
28        Args:
29            connect_sid: Scrapbox authentication cookie (connect.sid).
30        """
31        self.connect_sid = connect_sid
32        self.client = httpx.Client(
33            cookies={"connect.sid": connect_sid} if connect_sid else None,
34            follow_redirects=True,
35        )

Initialize the Scrapbox API client.

Arguments:

connect_sid: Scrapbox authentication cookie (connect.sid).

BASE_URL = 'https://scrapbox.io/api'

connect_sid

client

def close(self) -> None: View Source

45    def close(self) -> None:
46        """Close the HTTP client."""
47        self.client.close()

Close the HTTP client.

def get_pages( self, project_name: str, skip: int = 0, limit: int = 100) -> PageListResponse: View Source

49    def get_pages(self, project_name: str, skip: int = 0, limit: int = 100) -> PageListResponse:
50        """Get a list of pages from a project.
51
52        Args:
53            project_name: The name of the project.
54            skip: Number of pages to skip (default: 0).
55            limit: Number of pages to retrieve (default: 100).
56
57        Returns:
58            PageListResponse: The response containing the page list.
59        """
60        url = f"{self.BASE_URL}/pages/{project_name}"
61        params = {"skip": skip, "limit": limit}
62
63        response = self.client.get(url, params=params)
64        response.raise_for_status()
65
66        return PageListResponse.model_validate(response.json())

Get a list of pages from a project.

Arguments:

project_name: The name of the project.
skip: Number of pages to skip (default: 0).
limit: Number of pages to retrieve (default: 100).

Returns:

PageListResponse: The response containing the page list.

def get_page(self, project_name: str, page_title: str) -> PageDetail: View Source

68    def get_page(self, project_name: str, page_title: str) -> PageDetail:
69        """Get detailed information about a specific page.
70
71        Args:
72            project_name: The name of the project.
73            page_title: The title of the page.
74
75        Returns:
76            PageDetail: The detailed information about the page.
77        """
78        encoded_title = quote(page_title, safe="")
79        url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}"
80
81        response = self.client.get(url)
82        response.raise_for_status()
83
84        return PageDetail.model_validate(response.json())

Get detailed information about a specific page.

Arguments:

project_name: The name of the project.
page_title: The title of the page.

Returns:

PageDetail: The detailed information about the page.

def get_page_text(self, project_name: str, page_title: str) -> str: View Source

 86    def get_page_text(self, project_name: str, page_title: str) -> str:
 87        """Get the text content of a page.
 88
 89        Args:
 90            project_name: The name of the project.
 91            page_title: The title of the page.
 92
 93        Returns:
 94            str: The text content of the page.
 95        """
 96        encoded_title = quote(page_title, safe="")
 97        url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}/text"
 98
 99        response = self.client.get(url)
100        response.raise_for_status()
101
102        return response.text

Get the text content of a page.

Arguments:

project_name: The name of the project.
page_title: The title of the page.

Returns:

str: The text content of the page.

def get_page_icon_url(self, project_name: str, page_title: str) -> str: View Source

104    def get_page_icon_url(self, project_name: str, page_title: str) -> str:
105        """Get the icon image URL for a page.
106
107        This method returns the redirect destination URL of the page icon.
108
109        Args:
110            project_name: The name of the project.
111            page_title: The title of the page.
112
113        Returns:
114            str: The URL of the icon image.
115        """
116        encoded_title = quote(page_title, safe="")
117        url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}/icon"
118
119        response = self.client.get(url, follow_redirects=False)
120
121        if response.status_code == httpx.codes.FOUND:
122            return response.headers.get("location", "")
123        if response.status_code == httpx.codes.OK:
124            return url
125        response.raise_for_status()
126        return url

Get the icon image URL for a page.

This method returns the redirect destination URL of the page icon.

Arguments:

project_name: The name of the project.
page_title: The title of the page.

Returns:

str: The URL of the icon image.

def get_file(self, file_id: str) -> bytes: View Source

128    def get_file(self, file_id: str) -> bytes:
129        """Get a file uploaded to Scrapbox.
130
131        Args:
132            file_id: The file ID (e.g., "1a2b3c4d5e6f7g8h9i0j.JPG")
133                or full URL (e.g., "https://scrapbox.io/files/1a2b3c4d5e6f7g8h9i0j.JPG"
134                or "https://gyazo.com/1a2b3c4d5e6f7g8h9i0j1a2b3c4d5e6f").
135
136        Returns:
137            bytes: The binary data of the file.
138        """
139        url = file_id if file_id.startswith(("http://", "https://")) else f"https://scrapbox.io/files/{file_id}"
140
141        parsed_url = urlparse(url)
142        if parsed_url.hostname and "gyazo.com" in parsed_url.hostname:
143            # If URL already has a file extension (e.g., .mp4, .jpg), directly convert to i.gyazo.com
144            path = parsed_url.path.strip("/")
145            if "." in path.split("/")[-1]:  # Check if last path segment has extension
146                url = f"https://i.gyazo.com/{path}"
147            else:
148                # Use oEmbed API to get the actual file URL
149                oembed_url = f"{self.BASE_URL}/oembed-proxy/gyazo"
150                response = self.client.get(oembed_url, params={"url": url})
151                response.raise_for_status()
152                json = response.json()
153                if (oembed_type := json.get("type")) not in ("photo", "video"):
154                    msg = f"Unsupported Gyazo oEmbed type: {oembed_type}"
155                    raise ValueError(msg)
156                oembed_data = GyazoOEmbedResponse.model_validate(json)
157                if isinstance(oembed_data.root, GyazoOEmbedResponsePhoto):
158                    url = oembed_data.root.url
159                else:  # video
160                    # Extract Gyazo ID from the original URL and construct direct video URL
161                    gyazo_id = parsed_url.path.strip("/")
162                    url = f"https://i.gyazo.com/{gyazo_id}.mp4"
163        response = self.client.get(url)
164        response.raise_for_status()
165
166        return response.content

Get a file uploaded to Scrapbox.

Arguments:

file_id: The file ID (e.g., "1a2b3c4d5e6f7g8h9i0j.JPG") or full URL (e.g., "https://scrapbox.io/files/1a2b3c4d5e6f7g8h9i0j.JPG" or "https://gyazo.com/1a2b3c4d5e6f7g8h9i0j1a2b3c4d5e6f").

Returns:

bytes: The binary data of the file.

class User(pydantic.main.BaseModel): View Source

10class User(BaseModel):
11    """User information."""
12
13    model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)
14
15    id: str
16    name: str | None = None
17    display_name: str | None = None
18    photo: str | None = None

User information.

id: str = PydanticUndefined

name: str | None = None

display_name: str | None = None

photo: str | None = None