291 lines
7.4 KiB
Go
291 lines
7.4 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
var (
|
|
errDataMissing = errors.New("data missing from response")
|
|
errDomainFailed = errors.New("domain failed too many times")
|
|
)
|
|
|
|
type DomainWorker struct {
|
|
hub *WorkerHub
|
|
mu sync.RWMutex
|
|
rateLimit time.Duration
|
|
lastReq time.Time
|
|
domain string
|
|
userAgent string
|
|
httpTimeout time.Duration
|
|
badConnCount uint
|
|
badConnLast time.Time
|
|
badConnCooldown time.Duration
|
|
}
|
|
|
|
func NewDomainWorker(hub *WorkerHub, domain string, userAgent string, rateLimit uint, httpTimeout uint) *DomainWorker {
|
|
log.Info().Str("domain", domain).Msg("NewDomainWorker")
|
|
return &DomainWorker{
|
|
hub: hub,
|
|
rateLimit: time.Millisecond * time.Duration(rateLimit),
|
|
domain: domain,
|
|
userAgent: userAgent,
|
|
httpTimeout: time.Duration(httpTimeout) * time.Second,
|
|
badConnCooldown: time.Minute * 4,
|
|
}
|
|
}
|
|
|
|
func (o *DomainWorker) GetUserID(statusID string) (string, error) {
|
|
err := o.checkBadConn()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
data, err := o.GetStatus(statusID)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
account, ok := data["account"].(map[string]interface{})
|
|
if !ok {
|
|
return "", errDataMissing
|
|
}
|
|
userID, ok := account["id"].(string)
|
|
if !ok {
|
|
return "", errDataMissing
|
|
}
|
|
return userID, nil
|
|
}
|
|
|
|
func (o *DomainWorker) GetAccount(userID string) (map[string]interface{}, error) {
|
|
err := o.checkBadConn()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
body, err := o.getHTTPBody(fmt.Sprintf("https://%s/api/v1/accounts/%s", o.domain, userID))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
data := map[string]interface{}{}
|
|
err = json.Unmarshal(body, &data)
|
|
return data, err
|
|
}
|
|
|
|
func (o *DomainWorker) GetStatus(statusID string) (map[string]interface{}, error) {
|
|
err := o.checkBadConn()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
url := fmt.Sprintf("https://%s/api/v1/statuses/%s", o.domain, statusID)
|
|
body, err := o.getHTTPBody(url)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
data := map[string]interface{}{}
|
|
err = json.Unmarshal(body, &data)
|
|
return data, err
|
|
}
|
|
|
|
func (o *DomainWorker) GetTimeline(userID string, params string) ([]map[string]interface{}, error) {
|
|
err := o.checkBadConn()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
url := fmt.Sprintf("https://%s/api/v1/accounts/%s/statuses%s", o.domain, userID, params)
|
|
log.Info().Str("URL", url).Msg("GetTimeline")
|
|
body, err := o.getHTTPBody(url)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
data := []map[string]interface{}{}
|
|
err = json.Unmarshal(body, &data)
|
|
return data, err
|
|
}
|
|
|
|
func (o *DomainWorker) GetInstance() (map[string]interface{}, error) {
|
|
err := o.checkBadConn()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
body, err := o.getHTTPBody(fmt.Sprintf("https://%s/api/v1/instance", o.domain))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
data := map[string]interface{}{}
|
|
err = json.Unmarshal(body, &data)
|
|
return data, err
|
|
}
|
|
|
|
func (o *DomainWorker) FindUserID(username string) (string, error) {
|
|
log.Debug().Str("username", username).Msg("FindUserID")
|
|
err := o.checkBadConn()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
/* Pleroma will just resolve usernames to IDs, BASED */
|
|
acct, err := o.GetAccount(username)
|
|
if err == nil {
|
|
userID, ok := acct["id"].(string)
|
|
if ok {
|
|
return userID, nil
|
|
}
|
|
}
|
|
|
|
/* Gab implemented a REST call for resolving usernames to IDs */
|
|
acct, err = o.GetAccountByUsername(username)
|
|
if err == nil {
|
|
userID, ok := acct["id"].(string)
|
|
if ok {
|
|
return userID, nil
|
|
}
|
|
}
|
|
|
|
/* Mastodon, engage the hacky scraper */
|
|
url := fmt.Sprintf("https://%s/@%s", o.domain, username)
|
|
doc, err := o.getGoQueryDoc(url)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
/* Look for salmon link (older Mastodon?) */
|
|
var userID string
|
|
doc.Find("link").Each(func(i int, s *goquery.Selection) {
|
|
attr, exists := s.Attr("rel")
|
|
if exists && attr == "salmon" {
|
|
attr, exists = s.Attr("href")
|
|
if exists {
|
|
splitStr := strings.Split(attr, "/")
|
|
userID = splitStr[len(splitStr)-1]
|
|
return
|
|
}
|
|
}
|
|
})
|
|
if len(userID) > 0 {
|
|
return userID, nil
|
|
}
|
|
/* Pull from statuses */
|
|
var statusID string
|
|
doc.Find("div.entry.h-entry div.status div.status__info a.status__relative-time").Each(func(i int, selEntry *goquery.Selection) {
|
|
attr, exists := selEntry.Attr("href")
|
|
if exists && strings.Contains(attr, username) {
|
|
splitStr := strings.Split(attr, "/")
|
|
statusID = splitStr[len(splitStr)-1]
|
|
return
|
|
}
|
|
})
|
|
if len(statusID) > 0 {
|
|
status, err := o.GetStatus(statusID)
|
|
if err == nil {
|
|
account, ok := status["account"].(map[string]interface{})
|
|
if ok {
|
|
userID, ok = account["id"].(string)
|
|
if ok {
|
|
return userID, nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return "", errDataMissing
|
|
}
|
|
|
|
// GetAccountByUsername gets an account object by username (Gab-fork only)
|
|
func (o *DomainWorker) GetAccountByUsername(username string) (map[string]interface{}, error) {
|
|
err := o.checkBadConn()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
url := fmt.Sprintf("https://%s/api/v1/account_by_username/%s", o.domain, username)
|
|
body, err := o.getHTTPBody(url)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
data := map[string]interface{}{}
|
|
err = json.Unmarshal(body, &data)
|
|
return data, err
|
|
}
|
|
|
|
func (o *DomainWorker) getHTTPBody(url string) ([]byte, error) {
|
|
o.mu.Lock()
|
|
defer o.mu.Unlock()
|
|
diff := time.Now().Sub(o.lastReq)
|
|
if diff < o.rateLimit {
|
|
sleepDur := o.rateLimit - diff
|
|
log.Debug().Str("domain", o.domain).Dur("sleep", sleepDur).Msg("Ratelimit Hit")
|
|
time.Sleep(sleepDur)
|
|
log.Debug().Str("domain", o.domain).Dur("sleep", sleepDur).Msg("Ratelimit Resume")
|
|
}
|
|
req, err := http.NewRequest("GET", url, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("User-Agent", o.userAgent)
|
|
client := &http.Client{Timeout: o.httpTimeout}
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != 200 {
|
|
if resp.StatusCode == 401 || resp.StatusCode == 403 || resp.StatusCode == 410 || resp.StatusCode == 500 {
|
|
o.badConnCount++
|
|
o.badConnLast = time.Now()
|
|
}
|
|
return nil, errors.New("HTTP Status: " + resp.Status)
|
|
}
|
|
o.lastReq = time.Now()
|
|
return ioutil.ReadAll(resp.Body)
|
|
}
|
|
|
|
func (o *DomainWorker) getGoQueryDoc(url string) (*goquery.Document, error) {
|
|
o.mu.Lock()
|
|
defer o.mu.Unlock()
|
|
diff := time.Now().Sub(o.lastReq)
|
|
if diff < o.rateLimit {
|
|
sleepDur := o.rateLimit - diff
|
|
log.Debug().Str("domain", o.domain).Dur("sleep", sleepDur).Msg("Ratelimit Hit")
|
|
time.Sleep(sleepDur)
|
|
log.Debug().Str("domain", o.domain).Dur("sleep", sleepDur).Msg("Ratelimit Resume")
|
|
}
|
|
req, err := http.NewRequest("GET", url, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("User-Agent", o.userAgent)
|
|
client := &http.Client{Timeout: o.httpTimeout}
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != 200 {
|
|
if resp.StatusCode == 401 || resp.StatusCode == 403 || resp.StatusCode == 410 || resp.StatusCode == 500 {
|
|
o.badConnCount++
|
|
o.badConnLast = time.Now()
|
|
}
|
|
return nil, errors.New("HTTP Status: " + resp.Status)
|
|
}
|
|
o.lastReq = time.Now()
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func (o *DomainWorker) checkBadConn() error {
|
|
if o.badConnCount > 0 && time.Now().Sub(o.badConnLast) > o.badConnCooldown {
|
|
log.Debug().Str("domain", o.domain).Msg("removed count")
|
|
o.mu.Lock()
|
|
o.badConnCount--
|
|
o.mu.Unlock()
|
|
}
|
|
if o.badConnCount > 3 { // skipping mutex lock because race condition insensitive
|
|
return errDomainFailed
|
|
}
|
|
return nil
|
|
}
|