188 lines
4.5 KiB
Go
188 lines
4.5 KiB
Go
package monitor
|
|
|
|
import (
|
|
"backend/Type"
|
|
"backend/client"
|
|
"backend/middleware/alibaba"
|
|
"backend/model"
|
|
"backend/msg"
|
|
"backend/util"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"net"
|
|
"time"
|
|
|
|
"google.golang.org/protobuf/encoding/protojson"
|
|
)
|
|
|
|
func UserAliveMonitor(AppId int) {
|
|
for {
|
|
curCount, yCount, err := util.CountDistinctUidLastHour()
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
if yCount > 0 {
|
|
drop := float64(yCount-curCount) / float64(yCount)
|
|
str := `
|
|
# **游戏数据监控异常**
|
|
- 项目名称: meowment
|
|
- 监控项名称: 用户存活监控<br/>
|
|
-------------------------
|
|
监控时间段: %s ~ %s<br/>
|
|
昨日活跃用户数: %d<br/>
|
|
当前活跃用户数: %d<br/>
|
|
用户流失率: **%.2f%%**<br/>
|
|
<a>@所有人</a>
|
|
`
|
|
if drop >= 0.3 && (yCount-curCount) >= int64(10) {
|
|
alibaba.SendAliveMsg("服务器报警", fmt.Sprintf(str,
|
|
time.Now().Add(-time.Hour).Format("2006-01-02 15:04:05"),
|
|
time.Now().Format("2006-01-02 15:04:05"),
|
|
yCount,
|
|
curCount,
|
|
drop*100), "red")
|
|
}
|
|
}
|
|
|
|
time.Sleep(time.Until(time.Now().Truncate(time.Hour).Add(time.Hour)))
|
|
}
|
|
}
|
|
|
|
func ServerInfoMonitor() {
|
|
for {
|
|
now := time.Now()
|
|
next := now.Truncate(1 * time.Minute).Add(1 * time.Minute)
|
|
time.Sleep(time.Until(next))
|
|
monitorServerInfo()
|
|
}
|
|
}
|
|
|
|
func monitorServerInfo() {
|
|
db := util.MPool.GetGameDB()
|
|
var server []*Type.ServerInfo
|
|
defer db.Close()
|
|
err := db.Select(&server, "SELECT `AppId`, `ServerId`, `ServerName`, `Status`, `CreateTime`, `OpenServerTime`, `Host`, `Port` FROM server ")
|
|
if err != nil {
|
|
return
|
|
}
|
|
for _, v := range server {
|
|
if v.Status == 2 || v.Status == 3 { // 维护中或停用跳过
|
|
continue
|
|
}
|
|
go func(v *Type.ServerInfo) {
|
|
if err := monitorServerByFallback(v); err != nil {
|
|
log.Printf("monitor server failed, AppId=%d, ServerId=%d, err=%v", v.AppId, v.ServerId, err)
|
|
markServerOffline(v.AppId, v.ServerId)
|
|
}
|
|
}(v)
|
|
}
|
|
}
|
|
|
|
func monitorServerByFallback(v *Type.ServerInfo) error {
|
|
// 默认优先使用第一种方式;失败后自动切换第二种,再反向回退第一种重试一次。
|
|
firstErr := monitorServerByMethod1(v)
|
|
if firstErr == nil {
|
|
return nil
|
|
}
|
|
|
|
secondErr := monitorServerByMethod2(v)
|
|
if secondErr == nil {
|
|
return nil
|
|
}
|
|
|
|
if err := monitorServerByMethod1(v); err == nil {
|
|
return nil
|
|
}
|
|
|
|
return fmt.Errorf("method1 and method2 both failed, method1Err=%v, method2Err=%v", firstErr, secondErr)
|
|
}
|
|
|
|
func monitorServerByMethod1(v *Type.ServerInfo) error {
|
|
resp, err := client.GetServerInfo(v.AppId, v.ServerId, &msg.ReqServerInfo{})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
latency, err := util.GetAddressLatency(v.Host, v.Port)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
tmpDb := util.MPool.GetGameDB()
|
|
defer tmpDb.Close()
|
|
weight := util.GetServerWeight(resp)
|
|
extra, _ := protojson.Marshal(resp)
|
|
_, err = tmpDb.Exec("update server set Status=1, Online=?,free_mem=?,cpu=?,weight=?,latency=?,extra=? where AppId=? and ServerId=?", resp.PlayerNum, resp.Sys, resp.CPU, weight, latency, string(extra), v.AppId, v.ServerId)
|
|
return err
|
|
}
|
|
|
|
func monitorServerByMethod2(v *Type.ServerInfo) error {
|
|
address := fmt.Sprintf("%s:%d", v.Host, v.Port)
|
|
timeout := 3 * time.Second
|
|
start := time.Now()
|
|
conn, err := net.DialTimeout("tcp", address, timeout)
|
|
latency := time.Since(start).Milliseconds()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
conn.Close()
|
|
|
|
res, err := model.GetServerInfo(v.AppId, v.ServerId)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
serverInfo, ok := res.(map[string]interface{})
|
|
if !ok {
|
|
return errors.New("invalid server info response type")
|
|
}
|
|
|
|
freeMem := util.Int(serverInfo["FreeMem"])
|
|
usageMem := util.Int(serverInfo["Sys"])
|
|
cpu := util.Int(serverInfo["CPU"])
|
|
weight := calcServerWeight(freeMem, cpu)
|
|
|
|
tmpDb := util.MPool.GetGameDB()
|
|
defer tmpDb.Close()
|
|
_, err = tmpDb.Exec("update server set Status=1, Online=?,free_mem=?,cpu=?,weight=?,latency=? where AppId=? and ServerId=?", util.Int(serverInfo["PlayerNum"]), usageMem, cpu, weight, latency, v.AppId, v.ServerId)
|
|
return err
|
|
}
|
|
|
|
func calcServerWeight(freeMem int, cpu int) int {
|
|
memCap := 8192 // 8GB as reference cap for normalization
|
|
if freeMem < 0 {
|
|
freeMem = 0
|
|
}
|
|
memScore := 0
|
|
if freeMem >= memCap {
|
|
memScore = 100
|
|
} else {
|
|
memScore = freeMem * 100 / memCap
|
|
}
|
|
|
|
if cpu < 0 {
|
|
cpu = 0
|
|
}
|
|
if cpu > 100 {
|
|
cpu = 100
|
|
}
|
|
cpuScore := 100 - cpu
|
|
|
|
weight := (memScore*6 + cpuScore*4) / 10
|
|
if weight < 0 {
|
|
return 0
|
|
}
|
|
if weight > 100 {
|
|
return 100
|
|
}
|
|
return weight
|
|
}
|
|
|
|
func markServerOffline(appID int, serverID int) {
|
|
tmpDb := util.MPool.GetGameDB()
|
|
defer tmpDb.Close()
|
|
_, _ = tmpDb.Exec("update server set Status=0 where AppId=? and ServerId=?", appID, serverID)
|
|
}
|