initial working concept
This commit is contained in:
commit
503f2457c9
14 changed files with 4748 additions and 0 deletions
25
.gitignore
vendored
Normal file
25
.gitignore
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
target/
|
||||
**/target/
|
||||
dist/
|
||||
node_modules/
|
||||
|
||||
# Environment files
|
||||
.env
|
||||
.env.prod
|
||||
docker.dev/**/.env
|
||||
**/.env
|
||||
.env.*local
|
||||
|
||||
# Data directories
|
||||
docker.dev/**/volumes/db/data/*
|
||||
!docker.dev/**/volumes/.gitkeep
|
||||
**/kong.yaml
|
||||
|
||||
# misc
|
||||
*.crt
|
||||
*.pem
|
||||
.DS_Store
|
||||
|
||||
# Local files for common editors
|
||||
.idea
|
||||
.vscode
|
2967
Cargo.lock
generated
Normal file
2967
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
35
Cargo.toml
Normal file
35
Cargo.toml
Normal file
|
@ -0,0 +1,35 @@
|
|||
[package]
|
||||
name = "twitter-monitor"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
# HTTP client
|
||||
reqwest = { version = "0.11", features = ["json", "rustls-tls"] }
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
anyhow = "1.0"
|
||||
thiserror = "1.0"
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
config = "0.13"
|
||||
log = "0.4"
|
||||
env_logger = "0.10"
|
||||
|
||||
# Web framework
|
||||
actix-web = { version = "4.0", default-features = false, features = ["rustls"] }
|
||||
actix-rt = "2.0"
|
||||
actix-cors = "0.6"
|
||||
actix-files = "0.6"
|
||||
actix = "0.13"
|
||||
actix-web-actors = "4.0"
|
||||
|
||||
# Other
|
||||
futures = "0.3"
|
||||
regex = "1.0"
|
||||
|
||||
# Datetime handling
|
||||
chrono = "0.4"
|
||||
|
||||
# URL utilities
|
||||
urlencoding = "2.1"
|
204
README.md
Normal file
204
README.md
Normal file
|
@ -0,0 +1,204 @@
|
|||
# Twitter Monitor
|
||||
|
||||
A Rust-based Twitter monitoring service that streams tweets from your home timeline to a web frontend. Supports both cURL-based authentication and Twitter API authentication.
|
||||
|
||||
## Features
|
||||
|
||||
- Real-time home timeline monitoring
|
||||
- WebSocket-based streaming
|
||||
- Detailed response logging to files
|
||||
- Support for both cURL and API authentication
|
||||
- Clean, responsive web interface
|
||||
- Configurable polling intervals with countdown timer
|
||||
- Robust error handling and retries
|
||||
|
||||
## How It Works
|
||||
|
||||
The application consists of three main components:
|
||||
|
||||
1. **Twitter Client**: Handles authentication and API requests to fetch tweets
|
||||
2. **WebSocket Server**: Streams tweets to connected clients in real-time
|
||||
3. **Frontend**: Displays tweets and provides a user interface
|
||||
|
||||
## Authentication Methods
|
||||
|
||||
### cURL-based Authentication (Recommended)
|
||||
|
||||
This method captures an authenticated browser session to access Twitter without API keys:
|
||||
|
||||
1. **Get cURL Command**:
|
||||
```bash
|
||||
# Using Chrome browser:
|
||||
1. Go to x.com (Twitter)
|
||||
2. Log in to your account
|
||||
3. Navigate to your home timeline (click "For you" or "Following")
|
||||
4. Press F12 to open DevTools
|
||||
5. Go to the Network tab
|
||||
6. Find the "HomeLatestTimeline" request
|
||||
7. Right-click on the request
|
||||
8. Select "Copy" > "Copy as cURL (bash)"
|
||||
```
|
||||
|
||||
2. **Save cURL Command**:
|
||||
```bash
|
||||
# Create a file in the config directory
|
||||
mkdir -p config
|
||||
touch config/curl.txt
|
||||
|
||||
# Paste the copied cURL command into curl.txt
|
||||
```
|
||||
|
||||
### Twitter API Authentication (Alternative)
|
||||
|
||||
For official API access (Not fully implemented yet):
|
||||
|
||||
1. **Create Twitter Developer Account** at https://developer.twitter.com/
|
||||
2. **Get API Keys** from your Twitter Developer Portal
|
||||
3. **Configure API Keys** in your configuration file
|
||||
|
||||
## Installation
|
||||
|
||||
1. **Clone the Repository**:
|
||||
```bash
|
||||
git clone https://github.com/yourusername/twitter-monitor
|
||||
cd twitter-monitor
|
||||
```
|
||||
|
||||
2. **Build the Project**:
|
||||
```bash
|
||||
cargo build --release
|
||||
```
|
||||
|
||||
3. **Configure the Project**:
|
||||
Create a configuration file in the `config` directory:
|
||||
|
||||
```yaml
|
||||
# config/session.yaml
|
||||
twitter:
|
||||
# Path to your curl command file
|
||||
curl_file: "config/curl.txt"
|
||||
# Or use a bearer token (coming soon)
|
||||
# bearer_token: "your_bearer_token"
|
||||
|
||||
monitoring:
|
||||
# Polling interval in seconds
|
||||
polling_interval: 60
|
||||
# Maximum tweets to fetch per request
|
||||
max_tweets: 10
|
||||
|
||||
server:
|
||||
# WebSocket server host
|
||||
host: "127.0.0.1"
|
||||
# WebSocket server port
|
||||
port: 8080
|
||||
```
|
||||
|
||||
## Running the Project
|
||||
|
||||
Start the server with:
|
||||
|
||||
```bash
|
||||
# Development mode
|
||||
cargo run -- --config config/session.yaml
|
||||
|
||||
# Production mode
|
||||
cargo run --release -- --config config/session.yaml
|
||||
```
|
||||
|
||||
The application will:
|
||||
1. Parse the cURL command to extract authentication details
|
||||
2. Start monitoring your Twitter home timeline
|
||||
3. Start a WebSocket server to stream tweets
|
||||
4. Save all Twitter API responses to files for debugging
|
||||
|
||||
## Project Architecture
|
||||
|
||||
### Core Components
|
||||
|
||||
1. **TwitterClient** (`src/twitter/client.rs`)
|
||||
- Responsible for Twitter API interactions
|
||||
- Handles both cURL-based and API token authentication
|
||||
- Implements tweet parsing and monitoring logic
|
||||
- Includes robust error handling and retries
|
||||
|
||||
2. **CurlAuth** (`src/twitter/curl.rs`)
|
||||
- Parses cURL commands to extract authentication data
|
||||
- Extracts URL, headers, cookies, and request data
|
||||
- Provides logging of parsed components
|
||||
|
||||
3. **Server** (`src/server.rs`)
|
||||
- Manages WebSocket connections
|
||||
- Broadcasts tweets to connected clients
|
||||
- Implements client heartbeat mechanisms
|
||||
|
||||
4. **Main** (`src/main.rs`)
|
||||
- Coordinates application components
|
||||
- Loads configuration and initializes services
|
||||
- Sets up monitoring and broadcasting
|
||||
|
||||
### Technical Challenges Solved
|
||||
|
||||
1. **Twitter API Authentication**
|
||||
- Implemented parsing of cURL commands to extract session cookies
|
||||
- Added required feature flags to match Twitter's GraphQL API requirements
|
||||
- Handled authentication errors with clear messaging
|
||||
|
||||
2. **Robust Data Handling**
|
||||
- Implemented safe UTF-8 text handling to avoid panics
|
||||
- Added detailed response logging to files for debugging
|
||||
- Carefully parsed nested JSON structures
|
||||
|
||||
3. **Real-time Communication**
|
||||
- Built WebSocket server with proper connection handling
|
||||
- Implemented heartbeat mechanism to detect disconnected clients
|
||||
- Added broadcast functionality with error handling
|
||||
|
||||
4. **User Experience**
|
||||
- Added countdown timer between scans
|
||||
- Implemented clean logging for better readability
|
||||
- Provided detailed status updates during operation
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### cURL Authentication Issues
|
||||
|
||||
1. **Session Expired**:
|
||||
- Twitter sessions expire after some time
|
||||
- Generate a new cURL command and update your `curl.txt` file
|
||||
- Restart the application
|
||||
|
||||
2. **400 Bad Request Errors**:
|
||||
- Twitter's API requirements change frequently
|
||||
- The application automatically adds required feature flags
|
||||
- Response details are saved to `twitter_response_*.txt` files for debugging
|
||||
|
||||
3. **Parse Errors**:
|
||||
- Twitter's response format may change
|
||||
- Check the saved response files
|
||||
- Update the parsing logic if necessary
|
||||
|
||||
## Configuration Options
|
||||
|
||||
```yaml
|
||||
twitter:
|
||||
# Path to your curl command file
|
||||
curl_file: "config/curl.txt"
|
||||
# Or use a bearer token (coming soon)
|
||||
# bearer_token: "your_bearer_token"
|
||||
|
||||
monitoring:
|
||||
# Polling interval in seconds
|
||||
polling_interval: 60
|
||||
# Maximum tweets to fetch per request
|
||||
max_tweets: 10
|
||||
|
||||
server:
|
||||
# WebSocket server host
|
||||
host: "127.0.0.1"
|
||||
# WebSocket server port
|
||||
port: 8080
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License - see the LICENSE file for details.
|
39
config/api.yaml
Normal file
39
config/api.yaml
Normal file
|
@ -0,0 +1,39 @@
|
|||
# Twitter API Configuration (API-based)
|
||||
twitter:
|
||||
# Base URL for Twitter API
|
||||
base_url: "https://api.twitter.com/2"
|
||||
|
||||
# API-based authentication using bearer token
|
||||
bearer_token: "AAAAAAAAAAAAAAAAAAAAAI5P0QEAAAAAVe2ijbvQuIj0Bsfv%2F98u9zYnHbk%3DJuKMSUNSy3ae71QWL58fS1S1KUcJVr3Om5hJjVF1BqJqzWZQLL"
|
||||
|
||||
# API endpoints
|
||||
endpoints:
|
||||
search: "/tweets/search/recent"
|
||||
user: "/users"
|
||||
timeline: "/timeline/home"
|
||||
|
||||
# Monitoring Configuration
|
||||
monitoring:
|
||||
# List of Twitter usernames to monitor
|
||||
usernames:
|
||||
- "elonmusk"
|
||||
- "jack"
|
||||
|
||||
# Polling interval in seconds
|
||||
polling_interval: 300 # 5 minutes
|
||||
|
||||
# Maximum number of tweets to fetch per request
|
||||
max_tweets: 10
|
||||
|
||||
# Database Configuration
|
||||
database:
|
||||
# SQLite database file path
|
||||
path: "tweets.db"
|
||||
|
||||
# Logging Configuration
|
||||
logging:
|
||||
# Log level (debug, info, warn, error)
|
||||
level: "info"
|
||||
|
||||
# Log file path
|
||||
file: "twitter_monitor.log"
|
3
config/curl.example.txt
Normal file
3
config/curl.example.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
curl '' \
|
||||
-H 'x-twitter-auth-type: OAuth2Session' \
|
||||
-H 'x-twitter-client-language: en'
|
46
config/default.yaml
Normal file
46
config/default.yaml
Normal file
|
@ -0,0 +1,46 @@
|
|||
# Twitter API Configuration
|
||||
twitter:
|
||||
# Base URL for Twitter API
|
||||
base_url: "https://api.twitter.com/2"
|
||||
|
||||
# Authentication Method (choose one):
|
||||
# 1. Session-based authentication using cURL command
|
||||
# 2. API-based authentication using bearer token
|
||||
|
||||
# For session-based authentication:
|
||||
# curl_command: "curl 'https://twitter.com/i/api/2/timeline/home.json' -H 'authorization: Bearer ...' -H 'cookie: ...'"
|
||||
|
||||
# For API-based authentication:
|
||||
bearer_token: "AAAAAAAAAAAAAAAAAAAAAI5P0QEAAAAAVe2ijbvQuIj0Bsfv%2F98u9zYnHbk%3DJuKMSUNSy3ae71QWL58fS1S1KUcJVr3Om5hJjVF1BqJqzWZQLL"
|
||||
|
||||
# API endpoints
|
||||
endpoints:
|
||||
search: "/tweets/search/recent"
|
||||
user: "/users"
|
||||
timeline: "/timeline/home"
|
||||
|
||||
# Monitoring Configuration
|
||||
monitoring:
|
||||
# List of Twitter usernames to monitor
|
||||
usernames:
|
||||
- "elonmusk"
|
||||
- "jack"
|
||||
|
||||
# Polling interval in seconds
|
||||
polling_interval: 300 # 5 minutes
|
||||
|
||||
# Maximum number of tweets to fetch per request
|
||||
max_tweets: 10
|
||||
|
||||
# Database Configuration
|
||||
database:
|
||||
# SQLite database file path
|
||||
path: "tweets.db"
|
||||
|
||||
# Logging Configuration
|
||||
logging:
|
||||
# Log level (debug, info, warn, error)
|
||||
level: "info"
|
||||
|
||||
# Log file path
|
||||
file: "twitter_monitor.log"
|
16
config/session.yaml
Normal file
16
config/session.yaml
Normal file
|
@ -0,0 +1,16 @@
|
|||
# Twitter API Configuration (Session-based)
|
||||
twitter:
|
||||
curl_file: "./config/curl.txt"
|
||||
|
||||
# Monitoring Configuration
|
||||
monitoring:
|
||||
# Polling interval in seconds
|
||||
polling_interval: 180
|
||||
|
||||
# Maximum number of tweets to fetch per request
|
||||
max_tweets: 20
|
||||
|
||||
# Server Configuration
|
||||
server:
|
||||
host: "127.0.0.1"
|
||||
port: 8080
|
66
src/config.rs
Normal file
66
src/config.rs
Normal file
|
@ -0,0 +1,66 @@
|
|||
use anyhow::{Context, Result};
|
||||
use serde::Deserialize;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct Config {
|
||||
pub twitter: TwitterConfig,
|
||||
pub monitoring: MonitoringConfig,
|
||||
pub database: DatabaseConfig,
|
||||
pub logging: LoggingConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct TwitterConfig {
|
||||
pub base_url: String,
|
||||
#[serde(default)]
|
||||
pub curl_command: Option<String>,
|
||||
#[serde(default)]
|
||||
pub bearer_token: Option<String>,
|
||||
pub endpoints: EndpointConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct EndpointConfig {
|
||||
pub search: String,
|
||||
pub user: String,
|
||||
pub timeline: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct MonitoringConfig {
|
||||
pub usernames: Vec<String>,
|
||||
pub polling_interval: u64,
|
||||
pub max_tweets: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct DatabaseConfig {
|
||||
pub path: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct LoggingConfig {
|
||||
pub level: String,
|
||||
pub file: String,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
pub fn load(path: &Path) -> Result<Self> {
|
||||
let content = fs::read_to_string(path)
|
||||
.with_context(|| format!("Failed to read config file: {}", path.display()))?;
|
||||
|
||||
let config: Config = serde_yaml::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse config file: {}", path.display()))?;
|
||||
|
||||
// Validate authentication configuration
|
||||
if config.twitter.curl_command.is_none() && config.twitter.bearer_token.is_none() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"No authentication method configured. Please provide either curl_command or bearer_token"
|
||||
));
|
||||
}
|
||||
|
||||
Ok(config)
|
||||
}
|
||||
}
|
135
src/main.rs
Normal file
135
src/main.rs
Normal file
|
@ -0,0 +1,135 @@
|
|||
mod twitter;
|
||||
mod server;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use clap::Parser;
|
||||
use config::Config;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
use twitter::client::TwitterClient;
|
||||
use twitter::curl::CurlAuth;
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Args {
|
||||
/// Path to the configuration file
|
||||
#[arg(short, long, value_name = "FILE", default_value = "config/session.yaml")]
|
||||
config: String,
|
||||
|
||||
/// Authentication method to use (session or api)
|
||||
#[arg(short, long, value_name = "METHOD", default_value = "session")]
|
||||
auth_method: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug)]
|
||||
struct Settings {
|
||||
twitter: TwitterSettings,
|
||||
server: ServerSettings,
|
||||
monitoring: MonitoringSettings,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug)]
|
||||
struct TwitterSettings {
|
||||
curl_file: Option<String>,
|
||||
bearer_token: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug)]
|
||||
struct ServerSettings {
|
||||
host: String,
|
||||
port: u16,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Debug)]
|
||||
struct MonitoringSettings {
|
||||
polling_interval: u64,
|
||||
max_tweets: u32,
|
||||
}
|
||||
|
||||
#[actix_rt::main]
|
||||
async fn main() -> Result<()> {
|
||||
let args = Args::parse();
|
||||
|
||||
println!("Loading configuration from: {}", &args.config);
|
||||
|
||||
// Load configuration
|
||||
let config_builder = Config::builder()
|
||||
.add_source(config::File::from(PathBuf::from(&args.config)));
|
||||
|
||||
let config_result = config_builder.build();
|
||||
if let Err(e) = &config_result {
|
||||
println!("Error loading config: {}", e);
|
||||
return Err(anyhow!("Failed to load configuration: {}", e));
|
||||
}
|
||||
|
||||
let config = config_result?;
|
||||
let settings_result = config.try_deserialize::<Settings>();
|
||||
if let Err(e) = &settings_result {
|
||||
println!("Error deserializing config: {}", e);
|
||||
return Err(anyhow!("Failed to deserialize configuration: {}", e));
|
||||
}
|
||||
|
||||
let settings = settings_result?;
|
||||
println!("Configuration loaded successfully");
|
||||
|
||||
// Create server
|
||||
let server = server::Server::new();
|
||||
|
||||
// Initialize client
|
||||
let client = if let Some(curl_file_path) = settings.twitter.curl_file.clone() {
|
||||
println!("Using curl file: {}", curl_file_path);
|
||||
let curl_file_result = fs::read_to_string(&curl_file_path);
|
||||
if let Err(e) = &curl_file_result {
|
||||
println!("Error reading curl file: {}", e);
|
||||
return Err(anyhow!("Failed to read curl file: {}", e));
|
||||
}
|
||||
|
||||
let curl_command = curl_file_result?;
|
||||
let curl_auth_result = CurlAuth::from_curl(&curl_command);
|
||||
if let Err(e) = &curl_auth_result {
|
||||
println!("Error parsing curl command: {}", e);
|
||||
return Err(anyhow!("Failed to parse curl command: {}", e));
|
||||
}
|
||||
|
||||
let curl_auth = curl_auth_result?;
|
||||
TwitterClient::new(Some(curl_auth), None)
|
||||
} else if let Some(token) = settings.twitter.bearer_token.clone() {
|
||||
println!("Using bearer token authentication");
|
||||
TwitterClient::new(None, Some(token))
|
||||
} else {
|
||||
return Err(anyhow!("No authentication method specified in configuration"));
|
||||
};
|
||||
|
||||
// Start monitoring the home timeline
|
||||
println!("Starting to monitor home timeline");
|
||||
|
||||
let client_clone = client.clone();
|
||||
let server_clone = server.clone();
|
||||
let polling_interval = settings.monitoring.polling_interval;
|
||||
let max_tweets = settings.monitoring.max_tweets;
|
||||
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = client_clone.monitor_home_timeline(
|
||||
max_tweets,
|
||||
|tweets| {
|
||||
for tweet in tweets {
|
||||
let tweet_json = serde_json::to_string(&tweet)?;
|
||||
server_clone.broadcast_tweet(&tweet_json)?;
|
||||
println!("New tweet from @{}: {}", tweet.author_username.as_deref().unwrap_or("unknown"), tweet.text);
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
Duration::from_secs(polling_interval),
|
||||
).await {
|
||||
eprintln!("Error monitoring home timeline: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
// Start the server (this is a blocking call)
|
||||
println!("Starting server on {}:{}", settings.server.host, settings.server.port);
|
||||
server.start(&settings.server.host, settings.server.port).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
129
src/server.rs
Normal file
129
src/server.rs
Normal file
|
@ -0,0 +1,129 @@
|
|||
use actix_cors::Cors;
|
||||
use actix_files::Files;
|
||||
use actix_web::{web, App, HttpRequest, HttpResponse, HttpServer};
|
||||
use actix_web_actors::ws;
|
||||
use actix_web_actors::ws::WebsocketContext;
|
||||
use anyhow::Result;
|
||||
use tokio::sync::broadcast;
|
||||
use actix::Actor;
|
||||
use actix::ActorContext;
|
||||
use actix::AsyncContext;
|
||||
use actix::StreamHandler;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
const HEARTBEAT_INTERVAL: Duration = Duration::from_secs(5);
|
||||
const CLIENT_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Server {
|
||||
pub tx: broadcast::Sender<String>,
|
||||
}
|
||||
|
||||
impl Server {
|
||||
pub fn new() -> Self {
|
||||
let (tx, _) = broadcast::channel(100);
|
||||
Self { tx }
|
||||
}
|
||||
|
||||
pub async fn start(&self, host: &str, port: u16) -> std::io::Result<()> {
|
||||
let tx = self.tx.clone();
|
||||
|
||||
let server = HttpServer::new(move || {
|
||||
let tx = tx.clone();
|
||||
|
||||
let cors = Cors::default()
|
||||
.allow_any_origin()
|
||||
.allow_any_method()
|
||||
.allow_any_header();
|
||||
|
||||
App::new()
|
||||
.wrap(cors)
|
||||
.app_data(web::Data::new(tx.clone()))
|
||||
.route("/ws", web::get().to(ws_index))
|
||||
.service(Files::new("/static", "./static"))
|
||||
})
|
||||
.bind((host, port))?;
|
||||
|
||||
server.run().await
|
||||
}
|
||||
|
||||
pub fn broadcast_tweet(&self, tweet: &str) -> Result<(), broadcast::error::SendError<String>> {
|
||||
match self.tx.send(tweet.to_string()) {
|
||||
Ok(num_receivers) => {
|
||||
if num_receivers > 0 {
|
||||
println!("Tweet broadcasted to {} active WebSocket client(s)", num_receivers);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn ws_index(
|
||||
req: HttpRequest,
|
||||
stream: web::Payload,
|
||||
tx: web::Data<broadcast::Sender<String>>,
|
||||
) -> Result<HttpResponse, actix_web::Error> {
|
||||
ws::start(
|
||||
WebSocketSession {
|
||||
tx: tx.get_ref().clone(),
|
||||
hb: Instant::now()
|
||||
},
|
||||
&req,
|
||||
stream
|
||||
)
|
||||
}
|
||||
|
||||
pub struct WebSocketSession {
|
||||
tx: broadcast::Sender<String>,
|
||||
hb: Instant,
|
||||
}
|
||||
|
||||
impl WebSocketSession {
|
||||
fn hb(&self, ctx: &mut WebsocketContext<Self>) {
|
||||
ctx.run_interval(HEARTBEAT_INTERVAL, |act, ctx| {
|
||||
if Instant::now().duration_since(act.hb) > CLIENT_TIMEOUT {
|
||||
println!("WebSocket Client heartbeat failed, disconnecting!");
|
||||
ctx.stop();
|
||||
return;
|
||||
}
|
||||
ctx.ping(b"");
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl Actor for WebSocketSession {
|
||||
type Context = WebsocketContext<Self>;
|
||||
|
||||
fn started(&mut self, ctx: &mut Self::Context) {
|
||||
self.hb(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
impl StreamHandler<Result<ws::Message, ws::ProtocolError>> for WebSocketSession {
|
||||
fn handle(&mut self, msg: Result<ws::Message, ws::ProtocolError>, ctx: &mut Self::Context) {
|
||||
match msg {
|
||||
Ok(ws::Message::Ping(msg)) => {
|
||||
self.hb = Instant::now();
|
||||
ctx.pong(&msg);
|
||||
}
|
||||
Ok(ws::Message::Pong(_)) => {
|
||||
self.hb = Instant::now();
|
||||
}
|
||||
Ok(ws::Message::Text(text)) => {
|
||||
if let Err(e) = self.tx.send(text.to_string()) {
|
||||
println!("Error sending message: {}", e);
|
||||
}
|
||||
}
|
||||
Ok(ws::Message::Binary(bin)) => ctx.binary(bin),
|
||||
Ok(ws::Message::Close(reason)) => {
|
||||
ctx.close(reason);
|
||||
ctx.stop();
|
||||
}
|
||||
_ => ctx.stop(),
|
||||
}
|
||||
}
|
||||
}
|
897
src/twitter/client.rs
Normal file
897
src/twitter/client.rs
Normal file
|
@ -0,0 +1,897 @@
|
|||
use anyhow::{anyhow, Result};
|
||||
use reqwest::header;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
use tokio::time;
|
||||
use crate::twitter::curl::CurlAuth;
|
||||
use thiserror::Error;
|
||||
use std::fs::File;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use urlencoding;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum TwitterError {
|
||||
#[error("Failed to parse tweet data: {0}")]
|
||||
ParseError(String),
|
||||
#[error("API request failed: {0}")]
|
||||
RequestError(#[from] reqwest::Error),
|
||||
#[error("Authentication failed: {0}")]
|
||||
AuthError(String),
|
||||
#[error("Rate limit exceeded")]
|
||||
RateLimitExceeded,
|
||||
#[error("No authentication method configured")]
|
||||
NoAuthMethod,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Tweet {
|
||||
pub id: String,
|
||||
pub text: String,
|
||||
pub author_id: String,
|
||||
pub author_username: Option<String>,
|
||||
pub author_name: Option<String>,
|
||||
pub created_at: String,
|
||||
pub entities: Option<TweetEntities>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TweetEntities {
|
||||
pub hashtags: Option<Vec<Hashtag>>,
|
||||
pub mentions: Option<Vec<Mention>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Hashtag {
|
||||
pub tag: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Mention {
|
||||
pub username: String,
|
||||
pub id: String,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TwitterClient {
|
||||
client: reqwest::Client,
|
||||
curl_auth: Option<CurlAuth>,
|
||||
bearer_token: Option<String>,
|
||||
home_timeline_url: String,
|
||||
}
|
||||
|
||||
impl TwitterClient {
|
||||
pub fn new(
|
||||
curl_auth: Option<CurlAuth>,
|
||||
bearer_token: Option<String>,
|
||||
) -> Self {
|
||||
let client_builder = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.redirect(reqwest::redirect::Policy::none())
|
||||
.user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
|
||||
|
||||
// Build client with proper headers based on auth method
|
||||
let client = match &curl_auth {
|
||||
Some(auth) => {
|
||||
client_builder
|
||||
.default_headers(auth.headers.clone())
|
||||
.build()
|
||||
.unwrap()
|
||||
},
|
||||
None if bearer_token.is_some() => {
|
||||
let mut headers = header::HeaderMap::new();
|
||||
if let Some(token) = &bearer_token {
|
||||
if let Ok(auth_value) = format!("Bearer {}", token).parse() {
|
||||
headers.insert(header::AUTHORIZATION, auth_value);
|
||||
}
|
||||
}
|
||||
client_builder
|
||||
.default_headers(headers)
|
||||
.build()
|
||||
.unwrap()
|
||||
},
|
||||
_ => client_builder.build().unwrap(),
|
||||
};
|
||||
|
||||
// Process URL from the curl command
|
||||
let home_timeline_url = if let Some(ref auth) = curl_auth {
|
||||
let url = &auth.url;
|
||||
|
||||
// For GraphQL endpoints, we should strip query parameters
|
||||
// and use them in the request body instead
|
||||
if url.contains("/graphql/") && url.contains('?') {
|
||||
// Extract the base URL without query parameters
|
||||
if let Some(base_url) = url.split('?').next() {
|
||||
// Parse any URL parameters that might be useful
|
||||
if let Some(params) = parse_twitter_url_params(url) {
|
||||
println!("Extracted URL parameters - will use in POST request.");
|
||||
println!("Using base URL: {}", base_url);
|
||||
|
||||
// Now check if we have a data field in the curl auth
|
||||
// If not, we need to create one from the URL parameters
|
||||
if auth.data.is_none() && curl_auth.is_some() {
|
||||
// Create data JSON from URL parameters
|
||||
let (variables, features) = params;
|
||||
let mut data = serde_json::json!({});
|
||||
|
||||
if let Some(vars) = variables {
|
||||
data["variables"] = vars;
|
||||
}
|
||||
|
||||
if let Some(feats) = features {
|
||||
data["features"] = feats;
|
||||
}
|
||||
|
||||
// Can't modify auth directly since it's borrowed,
|
||||
// so we'll handle this in get_home_timeline instead
|
||||
println!("Created data from URL parameters");
|
||||
}
|
||||
}
|
||||
|
||||
base_url.to_string()
|
||||
} else {
|
||||
url.clone()
|
||||
}
|
||||
} else {
|
||||
url.clone()
|
||||
}
|
||||
} else {
|
||||
"https://x.com/i/api/graphql/nMyTQqsJiUGBKLGNSQamAA/HomeLatestTimeline".to_string()
|
||||
};
|
||||
|
||||
Self {
|
||||
client,
|
||||
curl_auth,
|
||||
bearer_token,
|
||||
home_timeline_url,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_home_timeline(&self, max_results: u32) -> Result<Vec<Tweet>> {
|
||||
println!("Making request to URL: {}", self.home_timeline_url);
|
||||
|
||||
// Check if we have auth data
|
||||
if let Some(ref auth) = self.curl_auth {
|
||||
let url = &auth.url;
|
||||
|
||||
println!("Using URL from curl command: {}", url);
|
||||
|
||||
// Extract cursor information if present
|
||||
let cursor = extract_cursor_from_url(url);
|
||||
if let Some(ref cursor_val) = cursor {
|
||||
println!("Found cursor in URL: {}", cursor_val);
|
||||
}
|
||||
|
||||
// Get the base URL without query parameters
|
||||
let base_url = url.split('?').next().unwrap_or(url);
|
||||
|
||||
// For Twitter GraphQL API, we should prefer POST requests for complex queries
|
||||
if url.contains("graphql") && auth.data.is_some() {
|
||||
println!("Using POST request for GraphQL API");
|
||||
|
||||
// Get the request body from curl command
|
||||
let body = auth.data.as_ref().map(|d| d.as_str()).unwrap_or("{}");
|
||||
println!("Request body: {}", body);
|
||||
|
||||
// Always add required features to the request body
|
||||
let fixed_body = {
|
||||
let mut json_body: serde_json::Value = serde_json::from_str(body).unwrap_or(serde_json::json!({}));
|
||||
|
||||
// Add the required features if not already present
|
||||
if !json_body.get("features").is_some() {
|
||||
println!("Adding missing features to request body");
|
||||
json_body["features"] = serde_json::json!({
|
||||
"verified_phone_label_enabled": true,
|
||||
"responsive_web_graphql_exclude_directive_enabled": true,
|
||||
"responsive_web_graphql_timeline_navigation_enabled": true,
|
||||
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,
|
||||
"creator_subscriptions_tweet_preview_api_enabled": true,
|
||||
"responsive_web_enhance_cards_enabled": false,
|
||||
"view_counts_everywhere_api_enabled": true,
|
||||
"longform_notetweets_consumption_enabled": true,
|
||||
"responsive_web_twitter_article_tweet_consumption_enabled": false,
|
||||
"responsive_web_grok_share_attachment_enabled": false,
|
||||
"responsive_web_jetfuel_frame": false,
|
||||
// Add all the missing features
|
||||
"responsive_web_grok_image_annotation_enabled": true,
|
||||
"articles_preview_enabled": true,
|
||||
"responsive_web_grok_analyze_button_fetch_trends_enabled": true,
|
||||
"premium_content_api_read_enabled": true,
|
||||
"longform_notetweets_rich_text_read_enabled": true,
|
||||
"profile_label_improvements_pcf_label_in_post_enabled": true,
|
||||
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": true,
|
||||
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,
|
||||
"rweb_video_screen_enabled": true,
|
||||
"responsive_web_grok_show_grok_translated_post": true,
|
||||
"responsive_web_grok_analysis_button_from_backend": true,
|
||||
"creator_subscriptions_quote_tweet_preview_enabled": true,
|
||||
"tweet_awards_web_tipping_enabled": true,
|
||||
"communities_web_enable_tweet_community_results_fetch": true,
|
||||
"responsive_web_edit_tweet_api_enabled": true,
|
||||
"c9s_tweet_anatomy_moderator_badge_enabled": true,
|
||||
"standardized_nudges_misinfo": true,
|
||||
"responsive_web_grok_analyze_post_followups_enabled": true,
|
||||
"freedom_of_speech_not_reach_fetch_enabled": true,
|
||||
"rweb_tipjar_consumption_enabled": true,
|
||||
"longform_notetweets_inline_media_enabled": true
|
||||
});
|
||||
}
|
||||
|
||||
// Add variables if not already present
|
||||
if !json_body.get("variables").is_some() {
|
||||
println!("Adding missing variables to request body");
|
||||
json_body["variables"] = serde_json::json!({
|
||||
"count": max_results,
|
||||
"includePromotedContent": false,
|
||||
"latestControlAvailable": true,
|
||||
"requestContext": "launch"
|
||||
});
|
||||
|
||||
// If we have a cursor, add it to the variables
|
||||
if let Some(ref cursor_val) = cursor {
|
||||
json_body["variables"]["cursor"] = serde_json::Value::String(cursor_val.clone());
|
||||
}
|
||||
}
|
||||
|
||||
serde_json::to_string(&json_body).unwrap_or(body.to_string())
|
||||
};
|
||||
|
||||
println!("Using request body: {}", fixed_body);
|
||||
|
||||
// Send POST request with the body - use the base URL without query parameters
|
||||
let response = self.client
|
||||
.post(base_url)
|
||||
.body(fixed_body)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
println!("Response status: {}", response.status());
|
||||
|
||||
// Check response status
|
||||
if response.status() == reqwest::StatusCode::TOO_MANY_REQUESTS {
|
||||
return Err(TwitterError::RateLimitExceeded.into());
|
||||
}
|
||||
|
||||
if response.status() == reqwest::StatusCode::UNAUTHORIZED {
|
||||
return Err(TwitterError::AuthError("Authentication failed".to_string()).into());
|
||||
}
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_text = response.text().await?;
|
||||
println!("Error response body: {}", error_text);
|
||||
return Err(anyhow!("Request failed with status: {}", status));
|
||||
}
|
||||
|
||||
// Parse the response
|
||||
let response_body = response.text().await?;
|
||||
|
||||
// // Uncomment if you want to see the response body
|
||||
// let timestamp = chrono::Utc::now().timestamp();
|
||||
// let filename = format!("twitter_response_{}.txt", timestamp);
|
||||
// println!("Writing response to file: {}", filename);
|
||||
//
|
||||
// if let Err(e) = save_response_to_file(&response_body, &filename) {
|
||||
// println!("Error saving response to file: {}", e);
|
||||
// }
|
||||
|
||||
let timeline_data: serde_json::Value = serde_json::from_str(&response_body)?;
|
||||
let tweets = self.parse_timeline_tweets(timeline_data)?;
|
||||
|
||||
// Limit the number of tweets to max_results
|
||||
let limited_tweets = if tweets.len() > max_results as usize {
|
||||
tweets[0..max_results as usize].to_vec()
|
||||
} else {
|
||||
tweets
|
||||
};
|
||||
|
||||
Ok(limited_tweets)
|
||||
} else if url.contains("variables=") || url.contains("features=") {
|
||||
println!("URL contains query parameters, using GET request");
|
||||
|
||||
// If URL already has query parameters, use it as is
|
||||
let request_url = if url.contains("features=") {
|
||||
url.to_string()
|
||||
} else {
|
||||
// Add features to the URL if not present
|
||||
let mut url_with_features = url.to_string();
|
||||
|
||||
// Add separator if needed
|
||||
if !url_with_features.contains('?') {
|
||||
url_with_features.push('?');
|
||||
} else if !url_with_features.ends_with('&') && !url_with_features.ends_with('?') {
|
||||
url_with_features.push('&');
|
||||
}
|
||||
|
||||
// Add feature flags
|
||||
url_with_features.push_str(
|
||||
"features=verified_phone_label_enabled,responsive_web_graphql_exclude_directive_enabled,\
|
||||
responsive_web_graphql_timeline_navigation_enabled,responsive_web_graphql_skip_user_profile_image_extensions_enabled,\
|
||||
creator_subscriptions_tweet_preview_api_enabled,responsive_web_enhance_cards_enabled,\
|
||||
view_counts_everywhere_api_enabled,longform_notetweets_consumption_enabled,\
|
||||
responsive_web_twitter_article_tweet_consumption_enabled,responsive_web_grok_image_annotation_enabled,\
|
||||
articles_preview_enabled,responsive_web_grok_analyze_button_fetch_trends_enabled,\
|
||||
premium_content_api_read_enabled,longform_notetweets_rich_text_read_enabled,\
|
||||
profile_label_improvements_pcf_label_in_post_enabled,tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled,\
|
||||
graphql_is_translatable_rweb_tweet_is_translatable_enabled,rweb_video_screen_enabled,\
|
||||
responsive_web_grok_show_grok_translated_post,responsive_web_grok_analysis_button_from_backend,\
|
||||
creator_subscriptions_quote_tweet_preview_enabled,tweet_awards_web_tipping_enabled,\
|
||||
communities_web_enable_tweet_community_results_fetch,responsive_web_edit_tweet_api_enabled,\
|
||||
c9s_tweet_anatomy_moderator_badge_enabled,standardized_nudges_misinfo,\
|
||||
responsive_web_grok_analyze_post_followups_enabled,freedom_of_speech_not_reach_fetch_enabled,\
|
||||
rweb_tipjar_consumption_enabled,longform_notetweets_inline_media_enabled"
|
||||
);
|
||||
|
||||
url_with_features
|
||||
};
|
||||
|
||||
println!("Making GET request to: {}", request_url);
|
||||
|
||||
// Send GET request with headers but no body
|
||||
let response = self.client
|
||||
.get(&request_url)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
println!("Response status: {}", response.status());
|
||||
|
||||
// Check response status
|
||||
if response.status() == reqwest::StatusCode::TOO_MANY_REQUESTS {
|
||||
return Err(TwitterError::RateLimitExceeded.into());
|
||||
}
|
||||
|
||||
if response.status() == reqwest::StatusCode::UNAUTHORIZED {
|
||||
return Err(TwitterError::AuthError("Authentication failed".to_string()).into());
|
||||
}
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_text = response.text().await?;
|
||||
println!("Error response body: {}", error_text);
|
||||
return Err(anyhow!("Request failed with status: {}", status));
|
||||
}
|
||||
|
||||
// Parse the response
|
||||
let response_body = response.text().await?;
|
||||
|
||||
// // Uncomment if you want to see the response body
|
||||
// let timestamp = chrono::Utc::now().timestamp();
|
||||
// let filename = format!("twitter_response_{}.txt", timestamp);
|
||||
// println!("Writing response to file: {}", filename);
|
||||
//
|
||||
// if let Err(e) = save_response_to_file(&response_body, &filename) {
|
||||
// println!("Error saving response to file: {}", e);
|
||||
// }
|
||||
|
||||
let timeline_data: serde_json::Value = serde_json::from_str(&response_body)?;
|
||||
let tweets = self.parse_timeline_tweets(timeline_data)?;
|
||||
|
||||
// Limit the number of tweets to max_results
|
||||
let limited_tweets = if tweets.len() > max_results as usize {
|
||||
tweets[0..max_results as usize].to_vec()
|
||||
} else {
|
||||
tweets
|
||||
};
|
||||
|
||||
Ok(limited_tweets)
|
||||
} else {
|
||||
// Get the request body from curl command
|
||||
let body = auth.data.as_ref().map(|d| d.as_str()).unwrap_or("{}");
|
||||
println!("Request body: {}", body);
|
||||
|
||||
// Always add required features to the request body
|
||||
let fixed_body = {
|
||||
let mut json_body: serde_json::Value = serde_json::from_str(body).unwrap_or(serde_json::json!({}));
|
||||
|
||||
// Add the required features if not already present
|
||||
if !json_body.get("features").is_some() {
|
||||
println!("Adding missing features to request body");
|
||||
json_body["features"] = serde_json::json!({
|
||||
"verified_phone_label_enabled": true,
|
||||
"responsive_web_graphql_exclude_directive_enabled": true,
|
||||
"responsive_web_graphql_timeline_navigation_enabled": true,
|
||||
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,
|
||||
"creator_subscriptions_tweet_preview_api_enabled": true,
|
||||
"responsive_web_enhance_cards_enabled": false,
|
||||
"view_counts_everywhere_api_enabled": true,
|
||||
"longform_notetweets_consumption_enabled": true,
|
||||
"responsive_web_twitter_article_tweet_consumption_enabled": false,
|
||||
"responsive_web_grok_share_attachment_enabled": false,
|
||||
"responsive_web_jetfuel_frame": false,
|
||||
// Add all the missing features
|
||||
"responsive_web_grok_image_annotation_enabled": true,
|
||||
"articles_preview_enabled": true,
|
||||
"responsive_web_grok_analyze_button_fetch_trends_enabled": true,
|
||||
"premium_content_api_read_enabled": true,
|
||||
"longform_notetweets_rich_text_read_enabled": true,
|
||||
"profile_label_improvements_pcf_label_in_post_enabled": true,
|
||||
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": true,
|
||||
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,
|
||||
"rweb_video_screen_enabled": true,
|
||||
"responsive_web_grok_show_grok_translated_post": true,
|
||||
"responsive_web_grok_analysis_button_from_backend": true,
|
||||
"creator_subscriptions_quote_tweet_preview_enabled": true,
|
||||
"tweet_awards_web_tipping_enabled": true,
|
||||
"communities_web_enable_tweet_community_results_fetch": true,
|
||||
"responsive_web_edit_tweet_api_enabled": true,
|
||||
"c9s_tweet_anatomy_moderator_badge_enabled": true,
|
||||
"standardized_nudges_misinfo": true,
|
||||
"responsive_web_grok_analyze_post_followups_enabled": true,
|
||||
"freedom_of_speech_not_reach_fetch_enabled": true,
|
||||
"rweb_tipjar_consumption_enabled": true,
|
||||
"longform_notetweets_inline_media_enabled": true
|
||||
});
|
||||
}
|
||||
|
||||
// Add variables if not already present
|
||||
if !json_body.get("variables").is_some() {
|
||||
println!("Adding missing variables to request body");
|
||||
json_body["variables"] = serde_json::json!({
|
||||
"count": max_results,
|
||||
"includePromotedContent": false,
|
||||
"latestControlAvailable": true,
|
||||
"requestContext": "launch"
|
||||
});
|
||||
}
|
||||
|
||||
serde_json::to_string(&json_body).unwrap_or(body.to_string())
|
||||
};
|
||||
|
||||
println!("Using request body: {}", fixed_body);
|
||||
|
||||
// Send POST request with the body
|
||||
let response = self.client
|
||||
.post(url)
|
||||
.body(fixed_body)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
println!("Response status: {}", response.status());
|
||||
|
||||
// Check response status
|
||||
if response.status() == reqwest::StatusCode::TOO_MANY_REQUESTS {
|
||||
return Err(TwitterError::RateLimitExceeded.into());
|
||||
}
|
||||
|
||||
if response.status() == reqwest::StatusCode::UNAUTHORIZED {
|
||||
return Err(TwitterError::AuthError("Authentication failed".to_string()).into());
|
||||
}
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_text = response.text().await?;
|
||||
println!("Error response body: {}", error_text);
|
||||
return Err(anyhow!("Request failed with status: {}", status));
|
||||
}
|
||||
|
||||
// Parse the response
|
||||
let response_body = response.text().await?;
|
||||
|
||||
// // Uncomment if you want to see the response body
|
||||
// let timestamp = chrono::Utc::now().timestamp();
|
||||
// let filename = format!("twitter_response_{}.txt", timestamp);
|
||||
// println!("Writing response to file: {}", filename);
|
||||
//
|
||||
// if let Err(e) = save_response_to_file(&response_body, &filename) {
|
||||
// println!("Error saving response to file: {}", e);
|
||||
// }
|
||||
|
||||
let timeline_data: serde_json::Value = serde_json::from_str(&response_body)?;
|
||||
let tweets = self.parse_timeline_tweets(timeline_data)?;
|
||||
|
||||
// Limit the number of tweets to max_results
|
||||
let limited_tweets = if tweets.len() > max_results as usize {
|
||||
tweets[0..max_results as usize].to_vec()
|
||||
} else {
|
||||
tweets
|
||||
};
|
||||
|
||||
Ok(limited_tweets)
|
||||
}
|
||||
} else if let Some(_token) = &self.bearer_token {
|
||||
// If we implement Bearer token auth in the future
|
||||
return Err(anyhow!("Bearer token auth not implemented yet"));
|
||||
} else {
|
||||
Err(TwitterError::NoAuthMethod.into())
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_timeline_tweets(&self, data: serde_json::Value) -> Result<Vec<Tweet>> {
|
||||
let mut tweets = Vec::new();
|
||||
|
||||
println!("Parsing tweets from response data...");
|
||||
|
||||
// Check top-level structure
|
||||
if !data.get("data").is_some() {
|
||||
println!("Warning: Data field missing in response");
|
||||
println!("Response keys: {:?}", data.as_object().map(|o| o.keys().collect::<Vec<_>>()));
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
// Try to navigate to the timeline data
|
||||
let timeline_data = data.get("data")
|
||||
.and_then(|d| d.get("home"))
|
||||
.and_then(|h| h.get("home_timeline_urt"));
|
||||
|
||||
if timeline_data.is_none() {
|
||||
println!("Warning: Could not find home_timeline_urt in response");
|
||||
if let Some(data_obj) = data.get("data") {
|
||||
println!("Data keys: {:?}", data_obj.as_object().map(|o| o.keys().collect::<Vec<_>>()));
|
||||
if let Some(home) = data_obj.get("home") {
|
||||
println!("Home keys: {:?}", home.as_object().map(|o| o.keys().collect::<Vec<_>>()));
|
||||
}
|
||||
}
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
// Get instructions
|
||||
let instructions = timeline_data.and_then(|t| t.get("instructions")).and_then(|i| i.as_array());
|
||||
if instructions.is_none() {
|
||||
println!("Warning: No instructions found in timeline data");
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
// Process instructions
|
||||
let instructions = instructions.unwrap();
|
||||
println!("Found {} instruction(s)", instructions.len());
|
||||
|
||||
// Track if we found any actual tweets
|
||||
let mut found_tweets = false;
|
||||
let mut found_cursors = false;
|
||||
|
||||
for (i, instruction) in instructions.iter().enumerate() {
|
||||
println!("Processing instruction {} type: {}", i,
|
||||
instruction.get("type").and_then(|t| t.as_str()).unwrap_or("unknown"));
|
||||
|
||||
if let Some(entries) = instruction.get("entries").and_then(|e| e.as_array()) {
|
||||
println!("Found {} entries in instruction {}", entries.len(), i);
|
||||
|
||||
// Check if we only have cursor entries
|
||||
let mut cursor_count = 0;
|
||||
|
||||
for entry in entries.iter() {
|
||||
if let Some(content) = entry.get("content") {
|
||||
if let Some(entry_type) = content.get("entryType").and_then(|t| t.as_str()) {
|
||||
if entry_type == "TimelineTimelineCursor" {
|
||||
cursor_count += 1;
|
||||
found_cursors = true;
|
||||
|
||||
// Print cursor details for debugging
|
||||
if let Some(cursor_type) = content.get("cursorType").and_then(|t| t.as_str()) {
|
||||
println!(" Found cursor: {} - value: {}", cursor_type,
|
||||
content.get("value").and_then(|v| v.as_str()).unwrap_or("unknown"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If all entries are cursors, we might need to fetch with the cursor value
|
||||
if cursor_count == entries.len() && cursor_count > 0 {
|
||||
println!("WARNING: Response only contains cursor entries, no tweets! You may need to update your curl command.");
|
||||
}
|
||||
|
||||
// Continue with normal processing
|
||||
for (j, entry) in entries.iter().enumerate() {
|
||||
if let Some(content) = entry.get("content").and_then(|c| c.as_object()) {
|
||||
// Try to find the tweet data
|
||||
let tweet_data = content.get("itemContent")
|
||||
.and_then(|ic| ic.get("tweet_results"))
|
||||
.and_then(|tr| tr.get("result"));
|
||||
|
||||
if tweet_data.is_none() {
|
||||
// Skip this entry if it's not a tweet
|
||||
continue;
|
||||
}
|
||||
|
||||
found_tweets = true;
|
||||
let tweet = tweet_data.unwrap();
|
||||
|
||||
// Get user info
|
||||
let user = tweet.get("core")
|
||||
.and_then(|c| c.get("user_results"))
|
||||
.and_then(|ur| ur.get("result"));
|
||||
|
||||
if user.is_none() {
|
||||
println!("Warning: User data missing for tweet in entry {}", j);
|
||||
continue;
|
||||
}
|
||||
|
||||
let author_username = user
|
||||
.and_then(|u| u.get("legacy"))
|
||||
.and_then(|l| l.get("screen_name"))
|
||||
.and_then(|n| n.as_str())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
let author_name = user
|
||||
.and_then(|u| u.get("legacy"))
|
||||
.and_then(|l| l.get("name"))
|
||||
.and_then(|n| n.as_str())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
if let (Some(id), Some(created_at)) = (
|
||||
tweet.get("rest_id").and_then(|i| i.as_str()),
|
||||
tweet.get("legacy").and_then(|l| l.get("created_at")).and_then(|c| c.as_str()),
|
||||
) {
|
||||
// First try legacy.full_text
|
||||
let text = tweet.get("legacy")
|
||||
.and_then(|l| l.get("full_text"))
|
||||
.and_then(|t| t.as_str())
|
||||
.or_else(|| {
|
||||
// Then try note_tweet.text if it exists (for longer tweets)
|
||||
tweet.get("note_tweet")
|
||||
.and_then(|nt| nt.get("note_tweet_results"))
|
||||
.and_then(|ntr| ntr.get("result"))
|
||||
.and_then(|r| r.get("text"))
|
||||
.and_then(|t| t.as_str())
|
||||
})
|
||||
.unwrap_or("Tweet text not found");
|
||||
|
||||
// Get author_id from user results
|
||||
let author_id = tweet.get("core")
|
||||
.and_then(|c| c.get("user_results"))
|
||||
.and_then(|ur| ur.get("result"))
|
||||
.and_then(|r| r.get("rest_id"))
|
||||
.and_then(|ri| ri.as_str())
|
||||
.unwrap_or("unknown");
|
||||
|
||||
println!("Found tweet by @{}: {}",
|
||||
author_username.as_deref().unwrap_or("unknown"),
|
||||
safe_truncate(&text, 100));
|
||||
|
||||
let tweet = Tweet {
|
||||
id: id.to_string(),
|
||||
text: text.to_string(),
|
||||
author_id: author_id.to_string(),
|
||||
author_username,
|
||||
author_name,
|
||||
created_at: created_at.to_string(),
|
||||
entities: None,
|
||||
};
|
||||
tweets.push(tweet);
|
||||
} else {
|
||||
println!("Warning: Required tweet data missing for entry {}", j);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
println!("Warning: No entries found in instruction {}", i);
|
||||
}
|
||||
}
|
||||
|
||||
// Special message if we got cursor data but no tweets
|
||||
if found_cursors && !found_tweets {
|
||||
println!("\n===============================================================");
|
||||
println!("IMPORTANT: Twitter returned cursor data but no actual tweets!");
|
||||
println!("This usually happens when:");
|
||||
println!("1. Your curl command is expired or missing essential cookies");
|
||||
println!("2. You need to include a valid cursor value in your request");
|
||||
println!("3. Twitter has changed their API response format");
|
||||
println!("\nTry getting a fresh curl command from your browser's Network tab");
|
||||
println!("===============================================================\n");
|
||||
}
|
||||
|
||||
println!("Successfully parsed {} tweets", tweets.len());
|
||||
Ok(tweets)
|
||||
}
|
||||
|
||||
pub async fn monitor_home_timeline(
|
||||
&self,
|
||||
max_tweets: u32,
|
||||
callback: impl Fn(Vec<Tweet>) -> Result<()>,
|
||||
interval: Duration,
|
||||
) -> Result<()> {
|
||||
let mut last_tweet_id = None;
|
||||
let mut retry_count = 0;
|
||||
const MAX_RETRIES: u32 = 3;
|
||||
|
||||
println!("Starting home timeline monitoring...");
|
||||
|
||||
loop {
|
||||
match self.get_home_timeline(max_tweets).await {
|
||||
Ok(tweets) => {
|
||||
retry_count = 0;
|
||||
|
||||
if !tweets.is_empty() {
|
||||
println!("Received {} tweets", tweets.len());
|
||||
|
||||
if let Some(ref last_id) = last_tweet_id {
|
||||
let new_tweets: Vec<Tweet> = tweets
|
||||
.iter()
|
||||
.take_while(|t| t.id != *last_id)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
if !new_tweets.is_empty() {
|
||||
println!("Found {} new tweets", new_tweets.len());
|
||||
|
||||
// Print full text of each new tweet
|
||||
for tweet in &new_tweets {
|
||||
println!("\n==== Tweet from @{} ====", tweet.author_username.as_deref().unwrap_or("unknown"));
|
||||
println!("{}", tweet.text);
|
||||
println!("====================\n");
|
||||
}
|
||||
|
||||
if let Err(e) = callback(new_tweets) {
|
||||
// Log the error but continue monitoring
|
||||
println!("Error in callback: {}. Continuing monitoring...", e);
|
||||
}
|
||||
} else {
|
||||
println!("No new tweets");
|
||||
}
|
||||
} else {
|
||||
// First run, process all tweets
|
||||
|
||||
// Print full text of a few tweets on first run
|
||||
for tweet in tweets.iter().take(3) {
|
||||
println!("\n==== Tweet from @{} ====", tweet.author_username.as_deref().unwrap_or("unknown"));
|
||||
println!("{}", tweet.text);
|
||||
println!("====================\n");
|
||||
}
|
||||
|
||||
if let Err(e) = callback(tweets.clone()) {
|
||||
// Log the error but continue monitoring
|
||||
println!("Error in callback: {}. Continuing monitoring...", e);
|
||||
}
|
||||
}
|
||||
|
||||
last_tweet_id = Some(tweets[0].id.clone());
|
||||
} else {
|
||||
println!("No tweets found in timeline");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Error retrieving timeline: {}", e);
|
||||
retry_count += 1;
|
||||
if retry_count >= MAX_RETRIES {
|
||||
return Err(e);
|
||||
}
|
||||
let backoff = Duration::from_secs(2u64.pow(retry_count - 1));
|
||||
println!("Retrying in {} seconds", backoff.as_secs());
|
||||
time::sleep(backoff).await;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
println!("Sleeping for {} seconds", interval.as_secs());
|
||||
|
||||
// Countdown timer logic
|
||||
let start = std::time::Instant::now();
|
||||
let total_secs = interval.as_secs();
|
||||
|
||||
for remaining in (1..=total_secs).rev() {
|
||||
let elapsed = start.elapsed().as_secs();
|
||||
if elapsed >= total_secs {
|
||||
break;
|
||||
}
|
||||
|
||||
let remaining = total_secs - elapsed;
|
||||
// Only print every 5 seconds to reduce console spam
|
||||
if remaining % 5 == 0 || remaining <= 3 {
|
||||
print!("\rNext scan in {} seconds...", remaining);
|
||||
std::io::Write::flush(&mut std::io::stdout()).unwrap_or(());
|
||||
}
|
||||
|
||||
// Sleep for 1 second
|
||||
time::sleep(Duration::from_secs(1)).await;
|
||||
}
|
||||
|
||||
// Clear the line and prepare for new output
|
||||
println!("\rScanning now... ");
|
||||
|
||||
// Don't need the original sleep anymore since we're using the countdown timer
|
||||
// time::sleep(interval).await;
|
||||
}
|
||||
}
|
||||
|
||||
// Keep this method for backward compatibility
|
||||
pub async fn get_user_tweets(&self, username: &str, max_results: u32) -> Result<Vec<Tweet>> {
|
||||
// For now, just redirect to home timeline
|
||||
println!("Note: Ignoring username '{}', fetching home timeline instead", username);
|
||||
self.get_home_timeline(max_results).await
|
||||
}
|
||||
|
||||
// Keep this method for backward compatibility
|
||||
pub async fn monitor_user(
|
||||
&self,
|
||||
username: &str,
|
||||
callback: impl Fn(Vec<Tweet>) -> Result<()>,
|
||||
interval: Duration,
|
||||
) -> Result<()> {
|
||||
// Just redirect to home timeline
|
||||
println!("Note: Ignoring username '{}', monitoring home timeline instead", username);
|
||||
self.monitor_home_timeline(10, callback, interval).await
|
||||
}
|
||||
}
|
||||
|
||||
fn safe_truncate(s: &str, max_chars: usize) -> String {
|
||||
if s.chars().count() <= max_chars {
|
||||
return s.to_string();
|
||||
}
|
||||
|
||||
let mut result = String::new();
|
||||
let mut char_count = 0;
|
||||
|
||||
for c in s.chars() {
|
||||
if char_count >= max_chars {
|
||||
break;
|
||||
}
|
||||
result.push(c);
|
||||
char_count += 1;
|
||||
}
|
||||
|
||||
result + "..."
|
||||
}
|
||||
|
||||
fn save_response_to_file(response: &str, filename: &str) -> Result<()> {
|
||||
let path = Path::new(filename);
|
||||
let mut file = File::create(path)?;
|
||||
file.write_all(response.as_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extract_cursor_from_url(url: &str) -> Option<String> {
|
||||
if let Some(query_part) = url.split('?').nth(1) {
|
||||
// Extract the variables parameter
|
||||
for param in query_part.split('&') {
|
||||
if param.starts_with("variables=") {
|
||||
let variables_str = param.split('=').nth(1)?;
|
||||
|
||||
// URL decode the variables string
|
||||
let decoded = urlencoding::decode(variables_str).ok()?;
|
||||
|
||||
// Parse the JSON
|
||||
if let Ok(variables) = serde_json::from_str::<serde_json::Value>(&decoded) {
|
||||
// Extract the cursor value
|
||||
let cursor = variables
|
||||
.get("cursor")
|
||||
.and_then(|c| c.as_str())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
if cursor.is_some() {
|
||||
println!("Found cursor in URL variables: {}", cursor.as_ref().unwrap());
|
||||
return cursor;
|
||||
}
|
||||
} else {
|
||||
println!("Failed to parse variables JSON: {}", decoded);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn parse_twitter_url_params(url: &str) -> Option<(Option<serde_json::Value>, Option<serde_json::Value>)> {
|
||||
let mut variables = None;
|
||||
let mut features = None;
|
||||
|
||||
if let Some(query_part) = url.split('?').nth(1) {
|
||||
// Process each query parameter
|
||||
for param in query_part.split('&') {
|
||||
if param.starts_with("variables=") {
|
||||
if let Some(vars_str) = param.split('=').nth(1) {
|
||||
if let Ok(decoded) = urlencoding::decode(vars_str) {
|
||||
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&decoded) {
|
||||
variables = Some(json);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if param.starts_with("features=") {
|
||||
if let Some(feat_str) = param.split('=').nth(1) {
|
||||
if let Ok(decoded) = urlencoding::decode(feat_str) {
|
||||
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&decoded) {
|
||||
features = Some(json);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if variables.is_some() || features.is_some() {
|
||||
Some((variables, features))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
181
src/twitter/curl.rs
Normal file
181
src/twitter/curl.rs
Normal file
|
@ -0,0 +1,181 @@
|
|||
use anyhow::{anyhow, Result};
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use reqwest::header;
|
||||
use serde_json;
|
||||
use urlencoding;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CurlAuth {
|
||||
pub url: String,
|
||||
pub headers: header::HeaderMap,
|
||||
pub data: Option<String>,
|
||||
pub url_parameters: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
impl CurlAuth {
|
||||
pub fn from_curl(curl_command: &str) -> Result<Self> {
|
||||
println!("Parsing curl command...");
|
||||
|
||||
// Extract the URL using regex
|
||||
let url_regex = Regex::new(r"curl\s+'([^']+)'")?;
|
||||
let url = match url_regex.captures(curl_command) {
|
||||
Some(captures) => {
|
||||
match captures.get(1) {
|
||||
Some(url_match) => url_match.as_str().to_string(),
|
||||
None => return Err(anyhow!("Could not extract URL from regex match"))
|
||||
}
|
||||
},
|
||||
None => return Err(anyhow!("Could not find URL in curl command: {}", curl_command))
|
||||
};
|
||||
|
||||
println!("URL: {}", url);
|
||||
|
||||
// Create a new HeaderMap
|
||||
let mut headers = header::HeaderMap::new();
|
||||
|
||||
// Extract headers using regex
|
||||
let header_regex = Regex::new(r"-H\s+'([^']+)'")?;
|
||||
println!("Extracting headers...");
|
||||
for cap in header_regex.captures_iter(curl_command) {
|
||||
if let Some(header) = cap.get(1) {
|
||||
let header_str = header.as_str();
|
||||
if let Some((key, value)) = header_str.split_once(':') {
|
||||
let key = key.trim();
|
||||
let value = value.trim();
|
||||
|
||||
println!("Header: {} = {}", key, value);
|
||||
|
||||
// Parse the header name and value
|
||||
if let Ok(header_name) = key.parse::<header::HeaderName>() {
|
||||
if let Ok(header_value) = value.parse::<header::HeaderValue>() {
|
||||
headers.insert(header_name, header_value);
|
||||
} else {
|
||||
println!("Warning: Could not parse header value: {}", value);
|
||||
}
|
||||
} else {
|
||||
println!("Warning: Could not parse header name: {}", key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract cookies using regex and add them to the headers
|
||||
let cookie_regex = Regex::new(r"-b\s+'([^']+)'")?;
|
||||
if let Some(captures) = cookie_regex.captures(curl_command) {
|
||||
if let Some(cookies_str) = captures.get(1) {
|
||||
println!("Found cookies: {}", cookies_str.as_str());
|
||||
if let Ok(cookie_value) = cookies_str.as_str().parse::<header::HeaderValue>() {
|
||||
headers.insert(header::COOKIE, cookie_value);
|
||||
} else {
|
||||
println!("Warning: Could not parse cookie value");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract data using regex - try different formats
|
||||
let mut data = None;
|
||||
|
||||
// First try --data-raw
|
||||
let data_raw_regex = Regex::new(r"--data-raw\s+'([^']+)'")?;
|
||||
if let Some(captures) = data_raw_regex.captures(curl_command) {
|
||||
if let Some(data_match) = captures.get(1) {
|
||||
data = Some(data_match.as_str().to_string());
|
||||
println!("Found data-raw: {}", data.as_ref().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
// If not found, try --data
|
||||
if data.is_none() {
|
||||
let data_regex = Regex::new(r"--data\s+'([^']+)'")?;
|
||||
if let Some(captures) = data_regex.captures(curl_command) {
|
||||
if let Some(data_match) = captures.get(1) {
|
||||
data = Some(data_match.as_str().to_string());
|
||||
println!("Found data: {}", data.as_ref().unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If not found, try -d
|
||||
if data.is_none() {
|
||||
let d_regex = Regex::new(r"-d\s+'([^']+)'")?;
|
||||
if let Some(captures) = d_regex.captures(curl_command) {
|
||||
if let Some(data_match) = captures.get(1) {
|
||||
data = Some(data_match.as_str().to_string());
|
||||
println!("Found -d: {}", data.as_ref().unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract URL parameters if present
|
||||
let url_parameters = if url.contains('?') {
|
||||
extract_url_parameters(&url)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// If we have URL parameters but no data, create data from parameters
|
||||
if data.is_none() && url_parameters.is_some() {
|
||||
let params = url_parameters.as_ref().unwrap();
|
||||
println!("Creating data from URL parameters");
|
||||
data = Some(serde_json::to_string(params).unwrap_or_default());
|
||||
println!("Created data: {}", data.as_ref().unwrap());
|
||||
}
|
||||
|
||||
// Check for essential auth headers/cookies
|
||||
if !headers.contains_key(header::COOKIE) {
|
||||
println!("Warning: Missing cookie header");
|
||||
}
|
||||
|
||||
println!("CurlAuth created with URL and {} headers", headers.len());
|
||||
println!("=== URL ===");
|
||||
println!("{}", url);
|
||||
println!("=== Headers ===");
|
||||
for (name, value) in headers.iter() {
|
||||
println!(" {}: {}", name, value.to_str().unwrap_or("[binary value]"));
|
||||
}
|
||||
println!("=== Data ===");
|
||||
if let Some(data_str) = &data {
|
||||
println!("{}", data_str);
|
||||
} else {
|
||||
println!(" [No data]");
|
||||
}
|
||||
println!("============");
|
||||
|
||||
Ok(Self {
|
||||
url,
|
||||
headers,
|
||||
data,
|
||||
url_parameters,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_url_parameters(url: &str) -> Option<serde_json::Value> {
|
||||
if let Some(query_part) = url.split('?').nth(1) {
|
||||
let mut result = serde_json::Map::new();
|
||||
|
||||
// Process each query parameter
|
||||
for param in query_part.split('&') {
|
||||
let parts: Vec<&str> = param.split('=').collect();
|
||||
if parts.len() == 2 {
|
||||
let key = parts[0];
|
||||
let value = parts[1];
|
||||
|
||||
if key == "variables" || key == "features" {
|
||||
if let Ok(decoded) = urlencoding::decode(value) {
|
||||
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&decoded) {
|
||||
result.insert(key.to_string(), json);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !result.is_empty() {
|
||||
return Some(serde_json::Value::Object(result));
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
5
src/twitter/mod.rs
Normal file
5
src/twitter/mod.rs
Normal file
|
@ -0,0 +1,5 @@
|
|||
pub mod client;
|
||||
pub mod curl;
|
||||
|
||||
pub use client::TwitterClient;
|
||||
pub use curl::CurlAuth;
|
Loading…
Add table
Reference in a new issue