diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..c86e993 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,278 @@ +import { BskyAgent } from "@atproto/api" +import * as dotenv from "dotenv" +import { buildRichText } from "./util" +import * as io from "./io" +import type { Post } from "./post" +import axios from 'axios' +import { getUserTweets } from "./twitter" +import { compressVideo } from "./video" +import * as fs from 'fs' + +dotenv.config() + +if (!fs.existsSync('./temp')) fs.mkdirSync('./temp') + +const BSKY_USERNAME = process.env.BSKY_USERNAME! +const BSKY_PASSWORD = process.env.BSKY_PASSWORD! +const TWITTER_USER = process.env.TWITTER_USER! +const CHECK_INTERVAL = parseInt(process.env.CHECK_INTERVAL || '60000') +const BSKY_CHAR_LIMIT = 290 + +function getGraphemeLength(text: string): number { + return [...new Intl.Segmenter().segment(text)].length +} + +function splitIntoChunks(text: string, limit: number): string[] { + if (getGraphemeLength(text) <= limit) return [text] + + const chunks: string[] = [] + const segments = [...new Intl.Segmenter().segment(text)].map(s => s.segment) + let current = '' + + for (const seg of segments) { + if (getGraphemeLength(current + seg) > limit) { + const lastSpace = current.lastIndexOf(' ') + if (lastSpace > limit - 80 && lastSpace > 0) { + chunks.push(current.slice(0, lastSpace).trim()) + current = current.slice(lastSpace).trim() + seg + } else { + chunks.push(current.trim()) + current = seg + } + } else { + current += seg + } + } + + if (current.trim()) chunks.push(current.trim()) + return chunks +} + +if (!BSKY_USERNAME || !BSKY_PASSWORD || !TWITTER_USER) { + console.error('missing env vars') + process.exit(1) +} + +const agent = new BskyAgent({ + service: 'https://bsky.social' +}) + +await agent.login({ identifier: BSKY_USERNAME, password: BSKY_PASSWORD }) + +async function checkNewPosts() { + const savedPosts = await io.getPosts() + + try { + console.log(`[${new Date().toISOString()}] Searching for tweets from @${TWITTER_USER}...`) + const tweets = await getUserTweets(TWITTER_USER, 5) + if (!tweets || tweets.length === 0) { + console.log(`[${new Date().toISOString()}] No tweets found`) + return + } + + const latestTweet = tweets[0] + + if (savedPosts && savedPosts.length > 0) { + const latestSavedPost = savedPosts[savedPosts.length - 1] + if (latestSavedPost.guid === latestTweet.id) { + console.log(`[${new Date().toISOString()}] No new posts`) + return + } + } + + console.log(`[${new Date().toISOString()}] Found new tweet: ${latestTweet.text.slice(0, 50)}...`) + + const media = latestTweet.media.map(m => ({ + type: m.type === 'video' ? 'video' as const : 'photo' as const, + url: m.url + })) + + let text = latestTweet.text + .replace(/https:\/\/t\.co\/\w+/g, '') + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/'/g, "'") + .trim() + const newPost = { + description: text, + guid: latestTweet.id, + media + } + + console.log(`[${new Date().toISOString()}] Posting to Bluesky...`) + await pushPost(newPost) + console.log(`[${new Date().toISOString()}] Successfully posted to Bluesky!`) + + savedPosts.push(newPost) + await io.writePosts(savedPosts) + } catch (error) { + console.error('error checking tweets:', error) + } +} + +async function fetchImageAsUint8Array(url: string): Promise<[Uint8Array, string] | null> { + try { + const response = await axios.get(url, { + responseType: 'arraybuffer', + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' + }, + timeout: 30000 + }) + const contentType = response.headers["content-type"]?.toString() || '' + if (!contentType.startsWith('image/') && !contentType.startsWith('video/')) { + return null + } + return [new Uint8Array(response.data), contentType] + } catch (error) { + return null + } +} + +async function pushPost(post: Post) { + + + const images = [] + let videoEmbed = null + + if (post.media) { + const firstVideo = post.media.find(m => m.type === 'video') + if (firstVideo) { + console.log(`[${new Date().toISOString()}] Processing video...`) + try { + await agent.login({ identifier: BSKY_USERNAME, password: BSKY_PASSWORD }) + const videoPath = `./temp/${post.guid}.mp4` + await compressVideo(firstVideo.url, videoPath) + const videoData = fs.readFileSync(videoPath) + const videoSize = videoData.length + const didDoc = await fetch(`https://plc.directory/${agent.session!.did}`) + const didData = await didDoc.json() as any + const pdsUrl = didData.service?.find((s: any) => s.id === '#atproto_pds')?.serviceEndpoint + const pdsDid = `did:web:${new URL(pdsUrl).hostname}` + + const serviceAuth = await agent.com.atproto.server.getServiceAuth({ + aud: pdsDid, + lxm: 'com.atproto.repo.uploadBlob', + exp: Math.floor(Date.now() / 1000) + 60 * 30, + }) + const videoServiceAuth = serviceAuth.data.token + console.log('DID:', agent.session?.did) + console.log('Token:', videoServiceAuth?.slice(0, 20) + '...') + const uploadUrl = new URL('https://video.bsky.app/xrpc/app.bsky.video.uploadVideo') + uploadUrl.searchParams.append('did', agent.session?.did || '') + uploadUrl.searchParams.append('name', `${post.guid}.mp4`) + const uploadResponse = await fetch(uploadUrl.toString(), { + method: 'POST', + headers: { + 'Authorization': `Bearer ${videoServiceAuth}`, + 'Content-Type': 'video/mp4', + 'Content-Length': videoSize.toString() + }, + body: new Uint8Array(videoData) + }) + if (!uploadResponse.ok) { + const errorText = await uploadResponse.text() + throw new Error(`Video upload failed: ${uploadResponse.status} - ${errorText}`) + } + const jobStatus = await uploadResponse.json() as any + let blob = jobStatus.blob + let attempts = 0 + const maxAttempts = 60 + while (!blob && attempts < maxAttempts) { + await new Promise(resolve => setTimeout(resolve, 1000)) + attempts++ + const statusResponse = await fetch( + `https://video.bsky.app/xrpc/app.bsky.video.getJobStatus?jobId=${jobStatus.jobId}`, + { + headers: { + 'Authorization': `Bearer ${videoServiceAuth}` + } + } + ) + const status = await statusResponse.json() as any + if (status.jobStatus?.blob) { + blob = status.jobStatus.blob + } + if (status.jobStatus?.state === 'JOB_STATE_FAILED') { + throw new Error('Video processing failed') + } + } + if (!blob) { + throw new Error('Video processing timed out') + } + videoEmbed = { + $type: 'app.bsky.embed.video', + video: blob, + alt: post.description.slice(0, 1000) + } + try { fs.unlinkSync(videoPath); } catch {} + } catch (error) { + console.error(`[${new Date().toISOString()}] Video upload failed:`, error) + } + } else { + console.log(`[${new Date().toISOString()}] Processing ${post.media.length} image(s)...`) + for (const item of post.media) { + if (images.length >= 4) break + try { + if (item.type === 'photo') { + const result = await fetchImageAsUint8Array(item.url) + if (result) { + const [imageArray, encoding] = result + const { data } = await agent.uploadBlob(imageArray, { encoding }) + images.push({ + alt: '', + image: data.blob + }) + } + } + } catch (error) { + console.error(`[${new Date().toISOString()}] Image upload failed:`, error) + } + } + } + } + + const chunks = splitIntoChunks(post.description, BSKY_CHAR_LIMIT) + console.log(`[${new Date().toISOString()}] Text length: ${post.description.length}, chunks: ${chunks.length}`) + + let rootRef: { uri: string, cid: string } | null = null + let parentRef: { uri: string, cid: string } | null = null + + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i] + const rt = await buildRichText(chunk, agent) + + let embed = undefined + if (i === 0) { + if (videoEmbed) { + embed = videoEmbed + } else if (images.length > 0) { + embed = { + $type: 'app.bsky.embed.images', + images: images + } + } + } + + const result = await agent.post({ + text: rt.text, + facets: rt.facets, + embed, + reply: rootRef && parentRef ? { + root: rootRef, + parent: parentRef + } : undefined, + createdAt: new Date().toISOString() + }) + + if (!rootRef) rootRef = { uri: result.uri, cid: result.cid } + parentRef = { uri: result.uri, cid: result.cid } + } + + +} + +setInterval(checkNewPosts, CHECK_INTERVAL) diff --git a/src/io.ts b/src/io.ts new file mode 100644 index 0000000..011733c --- /dev/null +++ b/src/io.ts @@ -0,0 +1,25 @@ +import fs from 'fs' +import type { Post } from './post' +import { promisify } from 'util' +import * as dotenv from 'dotenv' + +dotenv.config() + +const readFile = promisify(fs.readFile) +const writeFile = promisify(fs.writeFile) + +const DATA_FILE = process.env.DATA_FILE || './posts.json' + +export async function getPosts(): Promise { + try { + const data = await readFile(DATA_FILE) + const posts: Post[] = JSON.parse(data.toString()) + return posts + } catch (err) { + return [] + } +} + +export async function writePosts(posts: Post[]) { + await writeFile(DATA_FILE, JSON.stringify(posts)) +} \ No newline at end of file diff --git a/src/post.ts b/src/post.ts new file mode 100644 index 0000000..bdee993 --- /dev/null +++ b/src/post.ts @@ -0,0 +1,5 @@ +export interface Post { + description: string, + guid: string, + media?: Array<{ type: string, url: string }> +} \ No newline at end of file diff --git a/src/twitter.ts b/src/twitter.ts new file mode 100644 index 0000000..112f350 --- /dev/null +++ b/src/twitter.ts @@ -0,0 +1,34 @@ +export interface TweetData { + id: string; + text: string; + media: Array<{ + type: string; + url: string; + }>; + created_at: string; +} + +export async function getUserTweets(username: string, count: number = 10): Promise { + const proc = Bun.spawn(['python', 'get_tweets.py', username, count.toString()], { + stdout: 'pipe', + stderr: 'pipe' + }); + + const stdout = await new Response(proc.stdout).text(); + const stderr = await new Response(proc.stderr).text(); + await proc.exited; + + if (proc.exitCode !== 0) { + throw new Error(`Python script exited with code ${proc.exitCode}: ${stderr}`); + } + + try { + const result = JSON.parse(stdout); + if (result.error) { + throw new Error(result.error); + } + return result; + } catch (e) { + throw new Error(`Failed to parse JSON: ${stdout}`); + } +} diff --git a/src/util.ts b/src/util.ts new file mode 100644 index 0000000..8ebf450 --- /dev/null +++ b/src/util.ts @@ -0,0 +1,109 @@ +import HTMLParser from "node-html-parser" +import { RichText, AppBskyRichtextFacet, BskyAgent } from "@atproto/api" + +export function parseDescription(description: string, instance: string) { + const descElem = HTMLParser.parse(description) + const imageElems = descElem.getElementsByTagName('img') + const videoElems = descElem.getElementsByTagName('video') + const links = descElem.getElementsByTagName('a') + + const media = [] + + for (const image of imageElems) { + let src = image.attributes['src'] + if (src) { + if (src.startsWith('/')) { + src = `https://${instance}${src}` + } + media.push({ type: 'image', url: src }) + } + } + + for (const video of videoElems) { + let poster = video.attributes['poster'] || video.querySelector('source')?.attributes['src'] + if (poster) { + if (poster.startsWith('/')) { + poster = `https://${instance}${poster}` + } + media.push({ type: 'image', url: poster }) + } + } + + for (const image of imageElems) { + image.remove() + } + for (const video of videoElems) { + video.remove() + } + + let desc = descElem.textContent || '' + desc = desc.trim() + + return { desc, media } +} + +export async function buildRichText(text: string, agent: BskyAgent) { + const rt = new RichText({ text }) + await rt.detectFacets(agent) + return rt +} + +export async function extractMediaFromTweet(tweetUrl: string, instance: string) { + const axios = require('axios') + const media = [] + let text = '' + + try { + const response = await axios.get(tweetUrl, { + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' + }, + timeout: 15000 + }) + + const html = response.data + const doc = HTMLParser.parse(html) + + const tweetContent = doc.querySelector('.tweet-content') + if (tweetContent) { + const links = tweetContent.querySelectorAll('a') + for (const link of links) { + const href = link.getAttribute('href') + if (href && !href.includes('/search?q=')) { + link.replaceWith(link.textContent) + } + } + text = tweetContent.textContent.trim() + } + + const attachments = doc.querySelectorAll('.attachments .still-image img, .attachments .attachment-image img') + for (const img of attachments) { + let src = img.getAttribute('src') + if (src) { + if (src.startsWith('/')) { + src = `https://${instance}${src}` + } + media.push({ type: 'image', url: src }) + } + } + + const videos = doc.querySelectorAll('.attachments video') + for (const vid of videos) { + let poster = vid.getAttribute('poster') + if (poster) { + if (poster.startsWith('/')) { + poster = `https://${instance}${poster}` + } + media.push({ type: 'image', url: poster }) + } + } + + if (text === '' && media.length === 0) { + console.error('no content found on page, html preview:', html.substring(0, 500)) + } + } catch (error) { + console.error('failed to fetch tweet page:', error) + } + + return { text, media } +} \ No newline at end of file