package monitor import ( "backend/Type" "backend/client" "backend/middleware/alibaba" "backend/model" "backend/msg" "backend/util" "errors" "fmt" "net" "time" "google.golang.org/protobuf/encoding/protojson" ) func UserAliveMonitor(AppId int) { for { curCount, yCount, err := util.CountDistinctUidLastHour() if err != nil { continue } if yCount > 0 { drop := float64(yCount-curCount) / float64(yCount) str := ` # **游戏数据监控异常** - 项目名称: meowment - 监控项名称: 用户存活监控
------------------------- 监控时间段: %s ~ %s
昨日活跃用户数: %d
当前活跃用户数: %d
用户流失率: **%.2f%%**
@所有人 ` if drop >= 0.3 && (yCount-curCount) >= int64(10) { alibaba.SendAliveMsg("服务器报警", fmt.Sprintf(str, time.Now().Add(-time.Hour).Format("2006-01-02 15:04:05"), time.Now().Format("2006-01-02 15:04:05"), yCount, curCount, drop*100), "red") } } time.Sleep(time.Until(time.Now().Truncate(time.Hour).Add(time.Hour))) } } func ServerInfoMonitor() { for { time.Sleep(time.Second * 10) monitorServerInfo() } } func monitorServerInfo() { db := util.MPool.GetGameDB() var server []*Type.ServerInfo defer db.Close() err := db.Select(&server, "SELECT `AppId`, `ServerId`, `ServerName`, `Status`, `CreateTime`, `OpenServerTime`, `Host`, `Port` FROM server ") if err != nil { return } for _, v := range server { if v.Status == 2 || v.Status == 3 { // 维护中或停用跳过 continue } go func(v *Type.ServerInfo) { if err := monitorServerByFallback(v); err != nil { markServerOffline(v.AppId, v.ServerId) } }(v) } } func monitorServerByFallback(v *Type.ServerInfo) error { // 默认优先使用第一种方式;失败后自动切换第二种,再反向回退第一种重试一次。 firstErr := monitorServerByMethod1(v) if firstErr == nil { return nil } secondErr := monitorServerByMethod2(v) if secondErr == nil { return nil } if err := monitorServerByMethod1(v); err == nil { return nil } return fmt.Errorf("method1 and method2 both failed, method1Err=%v, method2Err=%v", firstErr, secondErr) } func monitorServerByMethod1(v *Type.ServerInfo) error { resp, err := client.GetServerInfo(v.AppId, v.ServerId, &msg.ReqServerInfo{}) if err != nil { return err } latency, err := util.GetAddressLatency(v.Host, v.Port) if err != nil { return err } tmpDb := util.MPool.GetGameDB() defer tmpDb.Close() weight := util.GetServerWeight(resp) extra, _ := protojson.Marshal(resp) _, err = tmpDb.Exec("update server set Status=1, Online=?,free_mem=?,cpu=?,weight=?,latency=?,extra=? where AppId=? and ServerId=?", resp.PlayerNum, resp.Sys, resp.CPU, weight, latency, string(extra), v.AppId, v.ServerId) return err } func monitorServerByMethod2(v *Type.ServerInfo) error { address := fmt.Sprintf("%s:%d", v.Host, v.Port) timeout := 3 * time.Second start := time.Now() conn, err := net.DialTimeout("tcp", address, timeout) latency := time.Since(start).Milliseconds() if err != nil { return err } conn.Close() res, err := model.GetServerInfo(v.AppId, v.ServerId) if err != nil { return err } serverInfo, ok := res.(map[string]interface{}) if !ok { return errors.New("invalid server info response type") } freeMem := util.Int(serverInfo["FreeMem"]) usageMem := util.Int(serverInfo["Sys"]) cpu := util.Int(serverInfo["CPU"]) weight := calcServerWeight(freeMem, cpu) tmpDb := util.MPool.GetGameDB() defer tmpDb.Close() _, err = tmpDb.Exec("update server set Status=1, Online=?,free_mem=?,cpu=?,weight=?,latency=? where AppId=? and ServerId=?", util.Int(serverInfo["PlayerNum"]), usageMem, cpu, weight, latency, v.AppId, v.ServerId) return err } func calcServerWeight(freeMem int, cpu int) int { memCap := 8192 // 8GB as reference cap for normalization if freeMem < 0 { freeMem = 0 } memScore := 0 if freeMem >= memCap { memScore = 100 } else { memScore = freeMem * 100 / memCap } if cpu < 0 { cpu = 0 } if cpu > 100 { cpu = 100 } cpuScore := 100 - cpu weight := (memScore*6 + cpuScore*4) / 10 if weight < 0 { return 0 } if weight > 100 { return 100 } return weight } func markServerOffline(appID int, serverID int) { tmpDb := util.MPool.GetGameDB() defer tmpDb.Close() _, _ = tmpDb.Exec("update server set Status=0 where AppId=? and ServerId=?", appID, serverID) }