admin_backend/monitor/Monitor.go
2026-05-07 14:39:24 +08:00

188 lines
4.5 KiB
Go

package monitor
import (
"backend/Type"
"backend/client"
"backend/middleware/alibaba"
"backend/model"
"backend/msg"
"backend/util"
"errors"
"fmt"
"log"
"net"
"time"
"google.golang.org/protobuf/encoding/protojson"
)
func UserAliveMonitor(AppId int) {
for {
curCount, yCount, err := util.CountDistinctUidLastHour()
if err != nil {
continue
}
if yCount > 0 {
drop := float64(yCount-curCount) / float64(yCount)
str := `
# **游戏数据监控异常**
- 项目名称: meowment
- 监控项名称: 用户存活监控<br/>
-------------------------
监控时间段: %s ~ %s<br/>
昨日活跃用户数: %d<br/>
当前活跃用户数: %d<br/>
用户流失率: **%.2f%%**<br/>
<a>@所有人</a>
`
if drop >= 0.3 && (yCount-curCount) >= int64(10) {
alibaba.SendAliveMsg("服务器报警", fmt.Sprintf(str,
time.Now().Add(-time.Hour).Format("2006-01-02 15:04:05"),
time.Now().Format("2006-01-02 15:04:05"),
yCount,
curCount,
drop*100), "red")
}
}
time.Sleep(time.Until(time.Now().Truncate(time.Hour).Add(time.Hour)))
}
}
func ServerInfoMonitor() {
for {
now := time.Now()
next := now.Truncate(1 * time.Minute).Add(1 * time.Minute)
time.Sleep(time.Until(next))
monitorServerInfo()
}
}
func monitorServerInfo() {
db := util.MPool.GetGameDB()
var server []*Type.ServerInfo
defer db.Close()
err := db.Select(&server, "SELECT `AppId`, `ServerId`, `ServerName`, `Status`, `CreateTime`, `OpenServerTime`, `Host`, `Port` FROM server ")
if err != nil {
return
}
for _, v := range server {
if v.Status == 2 || v.Status == 3 { // 维护中或停用跳过
continue
}
go func(v *Type.ServerInfo) {
if err := monitorServerByFallback(v); err != nil {
log.Printf("monitor server failed, AppId=%d, ServerId=%d, err=%v", v.AppId, v.ServerId, err)
markServerOffline(v.AppId, v.ServerId)
}
}(v)
}
}
func monitorServerByFallback(v *Type.ServerInfo) error {
// 默认优先使用第一种方式;失败后自动切换第二种,再反向回退第一种重试一次。
firstErr := monitorServerByMethod1(v)
if firstErr == nil {
return nil
}
secondErr := monitorServerByMethod2(v)
if secondErr == nil {
return nil
}
if err := monitorServerByMethod1(v); err == nil {
return nil
}
return fmt.Errorf("method1 and method2 both failed, method1Err=%v, method2Err=%v", firstErr, secondErr)
}
func monitorServerByMethod1(v *Type.ServerInfo) error {
resp, err := client.GetServerInfo(v.AppId, v.ServerId, &msg.ReqServerInfo{})
if err != nil {
return err
}
latency, err := util.GetAddressLatency(v.Host, v.Port)
if err != nil {
return err
}
tmpDb := util.MPool.GetGameDB()
defer tmpDb.Close()
weight := util.GetServerWeight(resp)
extra, _ := protojson.Marshal(resp)
_, err = tmpDb.Exec("update server set Status=1, Online=?,free_mem=?,cpu=?,weight=?,latency=?,extra=? where AppId=? and ServerId=?", resp.PlayerNum, resp.Sys, resp.CPU, weight, latency, string(extra), v.AppId, v.ServerId)
return err
}
func monitorServerByMethod2(v *Type.ServerInfo) error {
address := fmt.Sprintf("%s:%d", v.Host, v.Port)
timeout := 3 * time.Second
start := time.Now()
conn, err := net.DialTimeout("tcp", address, timeout)
latency := time.Since(start).Milliseconds()
if err != nil {
return err
}
conn.Close()
res, err := model.GetServerInfo(v.AppId, v.ServerId)
if err != nil {
return err
}
serverInfo, ok := res.(map[string]interface{})
if !ok {
return errors.New("invalid server info response type")
}
freeMem := util.Int(serverInfo["FreeMem"])
usageMem := util.Int(serverInfo["Sys"])
cpu := util.Int(serverInfo["CPU"])
weight := calcServerWeight(freeMem, cpu)
tmpDb := util.MPool.GetGameDB()
defer tmpDb.Close()
_, err = tmpDb.Exec("update server set Status=1, Online=?,free_mem=?,cpu=?,weight=?,latency=? where AppId=? and ServerId=?", util.Int(serverInfo["PlayerNum"]), usageMem, cpu, weight, latency, v.AppId, v.ServerId)
return err
}
func calcServerWeight(freeMem int, cpu int) int {
memCap := 8192 // 8GB as reference cap for normalization
if freeMem < 0 {
freeMem = 0
}
memScore := 0
if freeMem >= memCap {
memScore = 100
} else {
memScore = freeMem * 100 / memCap
}
if cpu < 0 {
cpu = 0
}
if cpu > 100 {
cpu = 100
}
cpuScore := 100 - cpu
weight := (memScore*6 + cpuScore*4) / 10
if weight < 0 {
return 0
}
if weight > 100 {
return 100
}
return weight
}
func markServerOffline(appID int, serverID int) {
tmpDb := util.MPool.GetGameDB()
defer tmpDb.Close()
_, _ = tmpDb.Exec("update server set Status=0 where AppId=? and ServerId=?", appID, serverID)
}