scrapbox
scrapbox-client
Scrapbox (Helpfeel Cosense) Client
Install
pip install scrapbox-client
CLI
$ sbc
usage: sbc [-h] [--version] [--connect-sid CONNECT_SID | --connect-sid-file CONNECT_SID_FILE] {pages,all-pages,page,text,icon,file} ...
Scrapbox API client CLI
positional arguments:
{pages,all-pages,page,text,icon,file}
Available commands
pages Get page list from a project
all-pages Get all pages from a project
page Get detailed information about a page
text Get text content of a page
icon Get icon URL for a page
file Download a file from Scrapbox
options:
-h, --help show this help message and exit
--version, -V Show program's version number and exit
--connect-sid CONNECT_SID
Scrapbox authentication cookie (connect.sid)
--connect-sid-file CONNECT_SID_FILE
Path to file containing connect.sid (default: ~/.config/sbc/connect.sid)
examples:
sbc pages my-project --limit 10 --skip 10 --json
sbc all-pages my-project --batch-size 500 --json
sbc page my-project "Page Title" --json
sbc text my-project "Page Title"
sbc icon my-project "Page Title"
sbc file 60190edf1176d9001c13f8e8.png --output image.png
priority of `connect.sid` source:
1. --connect-sid argument
2. --connect-sid-file argument
3. ~/.config/sbc/connect.sid file
4. SBC_CONNECT_SID environment variable
Library
Overview
from scrapbox.client import ScrapboxClient
PROJECT_NAME = "help-jp"
PAGE_TITLE = "ブラケティング"
# Access public project without authentication
with ScrapboxClient() as client:
# Get page list
pages = client.get_pages(PROJECT_NAME, skip=0, limit=5)
print(f"Project: {pages.project_name}")
print(f"Total pages: {pages.count}")
print()
print("First 5 pages:")
for page in pages.pages:
print(f" - {page.title} (views: {page.views})")
print()
print()
# Get individual page details
print("Get page details:")
page_detail = client.get_page(PROJECT_NAME, PAGE_TITLE)
print(f"Title: {page_detail.title}")
print(f"Lines: {page_detail.lines_count}")
print(f"Characters: {page_detail.chars_count}")
print(f"First 5 lines:")
for line in page_detail.lines[:5]:
print(f" {line.text}")
print()
print()
# Get page text
print("Page text:")
text = client.get_page_text(PROJECT_NAME, PAGE_TITLE)
print(text[:200] + "...")
print()
print()
# Get icon URL
print("Icon URL:")
icon_url = client.get_page_icon_url(PROJECT_NAME, PAGE_TITLE)
print(icon_url)
print()
print()
# Access private project with authentication
# connect.sid is obtained from browser cookies
print("=== Example with authentication ===")
connect_sid = "s%3AykQ__xxxxx-.xxxxxxxxxxxxxxxxxxxxx%2Bxxxxxxxxx%2Bxxxxxxxxxxx"
with ScrapboxClient(connect_sid=connect_sid) as client:
try:
pages = client.get_pages("your-private-pj", limit=3)
print(f"Project: {pages.project_name}")
for page in pages.pages:
print(f" - {page.title}")
except Exception as e:
print(f"Error: {e}")
Image
from scrapbox.client import ScrapboxClient
with ScrapboxClient() as client:
# Get image by specifying file ID
file_id = "1a2b3c4d5e6f7g8h9i0j.JPG"
print(f"Fetching file: {file_id}")
try:
image_data = client.get_file(file_id)
print(f"Successfully fetched: {len(image_data)} bytes")
# Save to file
output_path = "downloaded_image.jpg"
with open(output_path, "wb") as f:
f.write(image_data)
print(f"Saved: {output_path}")
except Exception as e:
print(f"Error: {e}")
print()
# Can also fetch with full URL
print("Fetch with full URL:")
try:
full_url = "https://gyazo.com/da78df293f9e83a74b5402411e2f2e01"
image_data2 = client.get_file(full_url)
print(f"Successfully fetched: {len(image_data2)} bytes")
except Exception as e:
print(f"Error: {e}")
License
MIT
1""".. include:: ../README.md""" # noqa: D415 2 3import importlib.metadata 4 5from .client import ScrapboxClient 6from .models import GyazoOEmbedResponse, Line, PageDetail, PageListItem, PageListResponse, User 7 8try: 9 __version__ = importlib.metadata.version(__name__) 10except importlib.metadata.PackageNotFoundError: 11 __version__ = "0.0.0" 12 13__all__ = ( 14 "GyazoOEmbedResponse", 15 "Line", 16 "PageDetail", 17 "PageListItem", 18 "PageListResponse", 19 "ScrapboxClient", 20 "User", 21)
172class GyazoOEmbedResponse(RootModel[GyazoOEmbedResponsePhoto | GyazoOEmbedResponseVideo]): 173 """Response from the Gyazo oEmbed API. 174 175 See: https://gyazo.com/api/docs/image#oembed 176 """ 177 178 model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True)
Response from the Gyazo oEmbed API.
55class Line(BaseModel): 56 """Line data in a page.""" 57 58 model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True) 59 60 id: str 61 text: str 62 user_id: str = Field(alias="userId") 63 created: int 64 updated: int
Line data in a page.
67class PageDetail(BaseModel): 68 """Detailed information about a page.""" 69 70 model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True) 71 72 id: str 73 title: str 74 image: str | None = None 75 descriptions: list[str] 76 user: User 77 last_update_user: User = Field(alias="lastUpdateUser") 78 pin: int 79 views: int 80 linked: int 81 commit_id: str | None = Field(None, alias="commitId") 82 created: int 83 updated: int 84 accessed: int 85 snapshot_created: int | None = Field(None, alias="snapshotCreated") 86 page_rank: float = Field(alias="pageRank") 87 last_accessed: int | None = Field(None, alias="lastAccessed") 88 lines_count: int = Field(alias="linesCount") 89 chars_count: int = Field(alias="charsCount") 90 helpfeels: list[str] 91 persistent: bool 92 lines: list[Line]
Detailed information about a page.
21class PageListItem(BaseModel): 22 """An item in the page list.""" 23 24 model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True) 25 26 id: str 27 title: str 28 image: str | None = None 29 descriptions: list[str] 30 user: User 31 last_update_user: User = Field(alias="lastUpdateUser") 32 pin: int 33 views: int 34 linked: int 35 created: int 36 updated: int 37 accessed: int 38 lines_count: int = Field(alias="linesCount") 39 chars_count: int = Field(alias="charsCount") 40 helpfeels: list[str]
An item in the page list.
43class PageListResponse(BaseModel): 44 """Response from the page list API.""" 45 46 model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True) 47 48 project_name: str = Field(alias="projectName") 49 skip: int 50 limit: int 51 count: int 52 pages: list[PageListItem]
Response from the page list API.
15class ScrapboxClient: 16 """Scrapbox API client. 17 18 This client provides methods to interact with the Scrapbox API, 19 including retrieving page lists, page details, page text, and files. 20 """ 21 22 """Base URL for the Scrapbox API.""" 23 BASE_URL = "https://scrapbox.io/api" 24 25 def __init__(self, connect_sid: str | None = None) -> None: 26 """Initialize the Scrapbox API client. 27 28 Args: 29 connect_sid: Scrapbox authentication cookie (connect.sid). 30 """ 31 self.connect_sid = connect_sid 32 self.client = httpx.Client( 33 cookies={"connect.sid": connect_sid} if connect_sid else None, 34 follow_redirects=True, 35 ) 36 37 def __enter__(self: Self) -> Self: 38 """Enter the runtime context related to this object.""" 39 return self 40 41 def __exit__(self, typ: type[BaseException] | None, exc: BaseException | None, tb: TracebackType | None, /) -> None: 42 """Exit the runtime context related to this object.""" 43 self.client.close() 44 45 def close(self) -> None: 46 """Close the HTTP client.""" 47 self.client.close() 48 49 def get_pages(self, project_name: str, skip: int = 0, limit: int = 100) -> PageListResponse: 50 """Get a list of pages from a project. 51 52 Args: 53 project_name: The name of the project. 54 skip: Number of pages to skip (default: 0). 55 limit: Number of pages to retrieve (default: 100). 56 57 Returns: 58 PageListResponse: The response containing the page list. 59 """ 60 url = f"{self.BASE_URL}/pages/{project_name}" 61 params = {"skip": skip, "limit": limit} 62 63 response = self.client.get(url, params=params) 64 response.raise_for_status() 65 66 return PageListResponse.model_validate(response.json()) 67 68 def get_page(self, project_name: str, page_title: str) -> PageDetail: 69 """Get detailed information about a specific page. 70 71 Args: 72 project_name: The name of the project. 73 page_title: The title of the page. 74 75 Returns: 76 PageDetail: The detailed information about the page. 77 """ 78 encoded_title = quote(page_title, safe="") 79 url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}" 80 81 response = self.client.get(url) 82 response.raise_for_status() 83 84 return PageDetail.model_validate(response.json()) 85 86 def get_page_text(self, project_name: str, page_title: str) -> str: 87 """Get the text content of a page. 88 89 Args: 90 project_name: The name of the project. 91 page_title: The title of the page. 92 93 Returns: 94 str: The text content of the page. 95 """ 96 encoded_title = quote(page_title, safe="") 97 url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}/text" 98 99 response = self.client.get(url) 100 response.raise_for_status() 101 102 return response.text 103 104 def get_page_icon_url(self, project_name: str, page_title: str) -> str: 105 """Get the icon image URL for a page. 106 107 This method returns the redirect destination URL of the page icon. 108 109 Args: 110 project_name: The name of the project. 111 page_title: The title of the page. 112 113 Returns: 114 str: The URL of the icon image. 115 """ 116 encoded_title = quote(page_title, safe="") 117 url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}/icon" 118 119 response = self.client.get(url, follow_redirects=False) 120 121 if response.status_code == httpx.codes.FOUND: 122 return response.headers.get("location", "") 123 if response.status_code == httpx.codes.OK: 124 return url 125 response.raise_for_status() 126 return url 127 128 def get_file(self, file_id: str) -> bytes: 129 """Get a file uploaded to Scrapbox. 130 131 Args: 132 file_id: The file ID (e.g., "1a2b3c4d5e6f7g8h9i0j.JPG") 133 or full URL (e.g., "https://scrapbox.io/files/1a2b3c4d5e6f7g8h9i0j.JPG" 134 or "https://gyazo.com/1a2b3c4d5e6f7g8h9i0j1a2b3c4d5e6f"). 135 136 Returns: 137 bytes: The binary data of the file. 138 """ 139 url = file_id if file_id.startswith(("http://", "https://")) else f"https://scrapbox.io/files/{file_id}" 140 141 parsed_url = urlparse(url) 142 if parsed_url.hostname and "gyazo.com" in parsed_url.hostname: 143 # If URL already has a file extension (e.g., .mp4, .jpg), directly convert to i.gyazo.com 144 path = parsed_url.path.strip("/") 145 if "." in path.split("/")[-1]: # Check if last path segment has extension 146 url = f"https://i.gyazo.com/{path}" 147 else: 148 # Use oEmbed API to get the actual file URL 149 oembed_url = f"{self.BASE_URL}/oembed-proxy/gyazo" 150 response = self.client.get(oembed_url, params={"url": url}) 151 response.raise_for_status() 152 json = response.json() 153 if (oembed_type := json.get("type")) not in ("photo", "video"): 154 msg = f"Unsupported Gyazo oEmbed type: {oembed_type}" 155 raise ValueError(msg) 156 oembed_data = GyazoOEmbedResponse.model_validate(json) 157 if isinstance(oembed_data.root, GyazoOEmbedResponsePhoto): 158 url = oembed_data.root.url 159 else: # video 160 # Extract Gyazo ID from the original URL and construct direct video URL 161 gyazo_id = parsed_url.path.strip("/") 162 url = f"https://i.gyazo.com/{gyazo_id}.mp4" 163 response = self.client.get(url) 164 response.raise_for_status() 165 166 return response.content
Scrapbox API client.
This client provides methods to interact with the Scrapbox API, including retrieving page lists, page details, page text, and files.
25 def __init__(self, connect_sid: str | None = None) -> None: 26 """Initialize the Scrapbox API client. 27 28 Args: 29 connect_sid: Scrapbox authentication cookie (connect.sid). 30 """ 31 self.connect_sid = connect_sid 32 self.client = httpx.Client( 33 cookies={"connect.sid": connect_sid} if connect_sid else None, 34 follow_redirects=True, 35 )
Initialize the Scrapbox API client.
Arguments:
- connect_sid: Scrapbox authentication cookie (connect.sid).
49 def get_pages(self, project_name: str, skip: int = 0, limit: int = 100) -> PageListResponse: 50 """Get a list of pages from a project. 51 52 Args: 53 project_name: The name of the project. 54 skip: Number of pages to skip (default: 0). 55 limit: Number of pages to retrieve (default: 100). 56 57 Returns: 58 PageListResponse: The response containing the page list. 59 """ 60 url = f"{self.BASE_URL}/pages/{project_name}" 61 params = {"skip": skip, "limit": limit} 62 63 response = self.client.get(url, params=params) 64 response.raise_for_status() 65 66 return PageListResponse.model_validate(response.json())
Get a list of pages from a project.
Arguments:
- project_name: The name of the project.
- skip: Number of pages to skip (default: 0).
- limit: Number of pages to retrieve (default: 100).
Returns:
PageListResponse: The response containing the page list.
68 def get_page(self, project_name: str, page_title: str) -> PageDetail: 69 """Get detailed information about a specific page. 70 71 Args: 72 project_name: The name of the project. 73 page_title: The title of the page. 74 75 Returns: 76 PageDetail: The detailed information about the page. 77 """ 78 encoded_title = quote(page_title, safe="") 79 url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}" 80 81 response = self.client.get(url) 82 response.raise_for_status() 83 84 return PageDetail.model_validate(response.json())
Get detailed information about a specific page.
Arguments:
- project_name: The name of the project.
- page_title: The title of the page.
Returns:
PageDetail: The detailed information about the page.
86 def get_page_text(self, project_name: str, page_title: str) -> str: 87 """Get the text content of a page. 88 89 Args: 90 project_name: The name of the project. 91 page_title: The title of the page. 92 93 Returns: 94 str: The text content of the page. 95 """ 96 encoded_title = quote(page_title, safe="") 97 url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}/text" 98 99 response = self.client.get(url) 100 response.raise_for_status() 101 102 return response.text
Get the text content of a page.
Arguments:
- project_name: The name of the project.
- page_title: The title of the page.
Returns:
str: The text content of the page.
104 def get_page_icon_url(self, project_name: str, page_title: str) -> str: 105 """Get the icon image URL for a page. 106 107 This method returns the redirect destination URL of the page icon. 108 109 Args: 110 project_name: The name of the project. 111 page_title: The title of the page. 112 113 Returns: 114 str: The URL of the icon image. 115 """ 116 encoded_title = quote(page_title, safe="") 117 url = f"{self.BASE_URL}/pages/{project_name}/{encoded_title}/icon" 118 119 response = self.client.get(url, follow_redirects=False) 120 121 if response.status_code == httpx.codes.FOUND: 122 return response.headers.get("location", "") 123 if response.status_code == httpx.codes.OK: 124 return url 125 response.raise_for_status() 126 return url
Get the icon image URL for a page.
This method returns the redirect destination URL of the page icon.
Arguments:
- project_name: The name of the project.
- page_title: The title of the page.
Returns:
str: The URL of the icon image.
128 def get_file(self, file_id: str) -> bytes: 129 """Get a file uploaded to Scrapbox. 130 131 Args: 132 file_id: The file ID (e.g., "1a2b3c4d5e6f7g8h9i0j.JPG") 133 or full URL (e.g., "https://scrapbox.io/files/1a2b3c4d5e6f7g8h9i0j.JPG" 134 or "https://gyazo.com/1a2b3c4d5e6f7g8h9i0j1a2b3c4d5e6f"). 135 136 Returns: 137 bytes: The binary data of the file. 138 """ 139 url = file_id if file_id.startswith(("http://", "https://")) else f"https://scrapbox.io/files/{file_id}" 140 141 parsed_url = urlparse(url) 142 if parsed_url.hostname and "gyazo.com" in parsed_url.hostname: 143 # If URL already has a file extension (e.g., .mp4, .jpg), directly convert to i.gyazo.com 144 path = parsed_url.path.strip("/") 145 if "." in path.split("/")[-1]: # Check if last path segment has extension 146 url = f"https://i.gyazo.com/{path}" 147 else: 148 # Use oEmbed API to get the actual file URL 149 oembed_url = f"{self.BASE_URL}/oembed-proxy/gyazo" 150 response = self.client.get(oembed_url, params={"url": url}) 151 response.raise_for_status() 152 json = response.json() 153 if (oembed_type := json.get("type")) not in ("photo", "video"): 154 msg = f"Unsupported Gyazo oEmbed type: {oembed_type}" 155 raise ValueError(msg) 156 oembed_data = GyazoOEmbedResponse.model_validate(json) 157 if isinstance(oembed_data.root, GyazoOEmbedResponsePhoto): 158 url = oembed_data.root.url 159 else: # video 160 # Extract Gyazo ID from the original URL and construct direct video URL 161 gyazo_id = parsed_url.path.strip("/") 162 url = f"https://i.gyazo.com/{gyazo_id}.mp4" 163 response = self.client.get(url) 164 response.raise_for_status() 165 166 return response.content
Get a file uploaded to Scrapbox.
Arguments:
- file_id: The file ID (e.g., "1a2b3c4d5e6f7g8h9i0j.JPG") or full URL (e.g., "https://scrapbox.io/files/1a2b3c4d5e6f7g8h9i0j.JPG" or "https://gyazo.com/1a2b3c4d5e6f7g8h9i0j1a2b3c4d5e6f").
Returns:
bytes: The binary data of the file.
10class User(BaseModel): 11 """User information.""" 12 13 model_config = ConfigDict(alias_generator=to_camel, from_attributes=True, populate_by_name=True) 14 15 id: str 16 name: str | None = None 17 display_name: str | None = None 18 photo: str | None = None
User information.