mattermost/server/channels/wsapi/websocket_handler.go

85 lines
2.5 KiB
Go
Raw Permalink Normal View History

// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
// See LICENSE.txt for license information.
package wsapi
import (
2017-09-06 18:12:54 -04:00
"net/http"
"github.com/mattermost/mattermost/server/public/model"
"github.com/mattermost/mattermost/server/public/shared/i18n"
"github.com/mattermost/mattermost/server/public/shared/mlog"
"github.com/mattermost/mattermost/server/v8/channels/app"
"github.com/mattermost/mattermost/server/v8/channels/app/platform"
)
func (api *API) APIWebSocketHandler(wh func(*model.WebSocketRequest) (map[string]any, *model.AppError)) webSocketHandler {
return webSocketHandler{api.App, wh}
}
type webSocketHandler struct {
app *app.App
handlerFunc func(*model.WebSocketRequest) (map[string]any, *model.AppError)
}
func (wh webSocketHandler) ServeWebSocket(conn *platform.WebConn, r *model.WebSocketRequest) {
// Don't log ping requests to reduce log noise
if r.Action != "ping" {
mlog.Debug("Websocket request", mlog.String("action", r.Action))
}
hub := wh.app.Srv().Platform().GetHubForUserId(conn.UserId)
MM-23805: Refactor web_hub (#14277) * MM-23800: remove goroutineID and stack printing Each hub has a goroutineID which is calculated with a known hack. The FAQ clearly explains why goroutines don't have an id: https://golang.org/doc/faq#no_goroutine_id. We only added that because sometimes the hub would be deadlocked and having the goroutineID would be useful when getting the stack trace. This is also problematic in stress tests because the hubs would frequently get overloaded and the logs would unnecessarily have stack traces. But that was in the past, and we have done extensive testing with load tests and fuzz testing to smooth any rough edges remaining. Including adding additional metrics for hub buffer size. Monitoring the metrics is a better way to approach this problem. Therefore, we remove these kludges from the code. * Also remove deadlock checking code There is no need for that anymore since we are getting rid of the stack printing anyways. Let's do a wholesale refactor and clean up the codebase. * MM-23805: Refactor web_hub This is a beginning of the refactoring of the websocket code. To start off with, we unexport some methods and constants which did not need to be exported. There are more remaining but some are out of scope for this PR. The main chunk of refactor is to unexport the webconn send channel which was the main cause of panics. Since we were directly sending to the connection from various parts of the codebase, it would be possible that the send channel would be closed and we could still send a message. This would crash the server. To fix this, we refactor the code to centralize all sending from the main hub goroutine. This means we can leverage the connections map to check if the connection exists or not, and only then send the message. We also move the cluster calls to cluster.go. * bring back cluster code inside hub * Incorporate review comments * Address review comments * rename index * MM-23807: Refactor web_conn - Unexport some struct fields and constants which are not necessary to be accessed from outside the package. This will help us moving the entire websocket handling code to a separate package later. - Change some empty string checks to check for empty string rather than doing a len check which is more idiomatic. Both of them compile to the same code. So it doesn't make a difference performance-wise. - Remove redundant ToJson calls to get the length. - Incorporate review comments - Unexport some more methods * Fix field name * Run make app-layers * Add note on hub check
2020-04-23 03:46:18 -04:00
if hub == nil {
return
}
session, sessionErr := wh.app.GetSession(conn.GetSessionToken())
if sessionErr != nil {
mlog.Error(
"websocket session error",
mlog.String("action", r.Action),
mlog.Int("seq", r.Seq),
mlog.String("user_id", conn.UserId),
mlog.String("error_message", sessionErr.SystemMessage(i18n.T)),
mlog.Err(sessionErr),
)
sessionErr.WipeDetailed()
errResp := model.NewWebSocketError(r.Seq, sessionErr)
MM-23805: Refactor web_hub (#14277) * MM-23800: remove goroutineID and stack printing Each hub has a goroutineID which is calculated with a known hack. The FAQ clearly explains why goroutines don't have an id: https://golang.org/doc/faq#no_goroutine_id. We only added that because sometimes the hub would be deadlocked and having the goroutineID would be useful when getting the stack trace. This is also problematic in stress tests because the hubs would frequently get overloaded and the logs would unnecessarily have stack traces. But that was in the past, and we have done extensive testing with load tests and fuzz testing to smooth any rough edges remaining. Including adding additional metrics for hub buffer size. Monitoring the metrics is a better way to approach this problem. Therefore, we remove these kludges from the code. * Also remove deadlock checking code There is no need for that anymore since we are getting rid of the stack printing anyways. Let's do a wholesale refactor and clean up the codebase. * MM-23805: Refactor web_hub This is a beginning of the refactoring of the websocket code. To start off with, we unexport some methods and constants which did not need to be exported. There are more remaining but some are out of scope for this PR. The main chunk of refactor is to unexport the webconn send channel which was the main cause of panics. Since we were directly sending to the connection from various parts of the codebase, it would be possible that the send channel would be closed and we could still send a message. This would crash the server. To fix this, we refactor the code to centralize all sending from the main hub goroutine. This means we can leverage the connections map to check if the connection exists or not, and only then send the message. We also move the cluster calls to cluster.go. * bring back cluster code inside hub * Incorporate review comments * Address review comments * rename index * MM-23807: Refactor web_conn - Unexport some struct fields and constants which are not necessary to be accessed from outside the package. This will help us moving the entire websocket handling code to a separate package later. - Change some empty string checks to check for empty string rather than doing a len check which is more idiomatic. Both of them compile to the same code. So it doesn't make a difference performance-wise. - Remove redundant ToJson calls to get the length. - Incorporate review comments - Unexport some more methods * Fix field name * Run make app-layers * Add note on hub check
2020-04-23 03:46:18 -04:00
hub.SendMessage(conn, errResp)
return
}
r.Session = *session
r.T = conn.T
r.Locale = conn.Locale
var data map[string]any
var err *model.AppError
if data, err = wh.handlerFunc(r); err != nil {
mlog.Error(
"websocket request handling error",
mlog.String("action", r.Action),
mlog.Int("seq", r.Seq),
mlog.String("user_id", conn.UserId),
mlog.String("error_message", err.SystemMessage(i18n.T)),
mlog.Err(err),
)
err.WipeDetailed()
Merging performance branch into master (#4268) * improve performance on sendNotifications * Fix SQL queries * Remove get direct profiles, not needed anymore * Add raw data to error details if AppError fails to decode * men * Fix decode (#4052) * Fixing json decode * Adding unit test * Initial work for client scaling (#4051) * Begin adding paging to profiles API * Added more paging functionality * Finish hooking up admin console user lists * Add API for searching users and add searching to all user lists * Add lazy loading of profiles * Revert config.json * Fix unit tests and some style issues * Add GetProfilesFromList to Go driver and fix web unit test * Update etag for GetProfiles * Updating ui for filters and pagination (#4044) * Updating UI for pagination * Adjusting margins for filter row * Adjusting margin for specific modals * Adding relative padding to system console * Adjusting responsive view * Update client user tests * Minor fixes for direct messages modal (#4056) * Remove some unneeded initial load calls (#4057) * UX updates to user lists, added smart counts and bug fixes (#4059) * Improved getExplicitMentions and unit tests (#4064) * Refactor getting posts to lazy load profiles correctly (#4062) * Comment out SetActiveChannel test (#4066) * Profiler cpu, block, and memory profiler. (#4081) * Fix TestSetActiveChannel unit test (#4071) * Fixing build failure caused by dependancies updating (#4076) * Adding profiler * Fix admin_team_member_dropdown eslint errors * Bumping session cache size (#4077) * Bumping session cache size * Bumping status cache * Refactor how the client handles channel members to be large team friendly (#4106) * Refactor how the client handles channel members to be large team friendly * Change Id to ChannelId in ChannelStats model * Updated getChannelMember and getProfilesByIds routes to match proposal * Performance improvements (#4100) * Performance improvements * Fixing re-connect issue * Fixing error message * Some other minor perf tweaks * Some other minor perf tweaks * Fixing config file * Fixing buffer size * Fixing web socket send message * adding some error logging * fix getMe to be user required * Fix websocket event for new user * Fixing shutting down * Reverting web socket changes * Fixing logging lvl * Adding caching to GetMember * Adding some logging * Fixing caching * Fixing caching invalidate * Fixing direct message caching * Fixing caching * Fixing caching * Remove GetDirectProfiles from initial load * Adding logging and fixing websocket client * Adding back caching from bad merge. * Explicitly close go driver requests (#4162) * Refactored how the client handles team members to be more large team friendly (#4159) * Refactor getProfilesForDirectMessageList API into getAllProfiles API * Refactored how the client handles team members to be more large team friendly * Fix js error when receiving a notification * Fix JS error caused by current user being overwritten with sanitized version (#4165) * Adding error message to status failure (#4167) * Fix a few bugs caused by client scaling refactoring (#4170) * When there is no read replica, don't open a second set of connections to the master database (#4173) * Adding connection tacking to stats (#4174) * Reduce DB writes for statuses and other status related changes (#4175) * Fix bug preventing opening of DM channels from more modal (#4181) * Fixing socket timing error (#4183) * Fixing ping/pong handler * Fixing socket timing error * Commenting out status broadcasting * Removing user status changes * Removing user status changes * Removing user status changes * Removing user status changes * Adding DoPreComputeJson() * Performance improvements (#4194) * * Fix System Console Analytics queries * Add db.SetConnMaxLifetime to 15 minutes * Add "net/http/pprof" for profiling * Add FreeOSMemory() to manually release memory on reload config * Add flag to enable http profiler * Fix memory leak (#4197) * Fix memory leak * removed unneeded nil assignment * Fixing go routine leak (#4208) * Merge fixes * Merge fix * Refactored statuses to be queried by the client rather than broadcast by the server (#4212) * Refactored server code to reduce status broadcasts and to allow getting statuses by IDs * Refactor client code to periodically fetch statuses * Add store unit test for getting statuses by ids * Fix status unit test * Add getStatusesByIds REST API and move the client over to use that instead of the WebSocket * Adding multiple threads to websocket hub (#4230) * Adding multiple threads to websocket hub * Fixing unit tests * Fixing so websocket connections from the same user end up in the same… (#4240) * Fixing so websocket connections from the same user end up in the same list * Removing old comment * Refactor user autocomplete to query the server (#4239) * Add API for autocompleting users * Converted at mention autocomplete to query server * Converted user search autocomplete to query server * Switch autocomplete API naming to use term instead of username * Split autocomplete API into two, one for channels and for teams * Fix copy/paste error * Some final client scaling fixes (#4246) * Add lazy loading of profiles to integration pages * Add lazy loading of profiles to emoji page * Fix JS error when receiving post in select team menu and also clean up channel store
2016-10-19 14:49:25 -04:00
errResp := model.NewWebSocketError(r.Seq, err)
MM-23805: Refactor web_hub (#14277) * MM-23800: remove goroutineID and stack printing Each hub has a goroutineID which is calculated with a known hack. The FAQ clearly explains why goroutines don't have an id: https://golang.org/doc/faq#no_goroutine_id. We only added that because sometimes the hub would be deadlocked and having the goroutineID would be useful when getting the stack trace. This is also problematic in stress tests because the hubs would frequently get overloaded and the logs would unnecessarily have stack traces. But that was in the past, and we have done extensive testing with load tests and fuzz testing to smooth any rough edges remaining. Including adding additional metrics for hub buffer size. Monitoring the metrics is a better way to approach this problem. Therefore, we remove these kludges from the code. * Also remove deadlock checking code There is no need for that anymore since we are getting rid of the stack printing anyways. Let's do a wholesale refactor and clean up the codebase. * MM-23805: Refactor web_hub This is a beginning of the refactoring of the websocket code. To start off with, we unexport some methods and constants which did not need to be exported. There are more remaining but some are out of scope for this PR. The main chunk of refactor is to unexport the webconn send channel which was the main cause of panics. Since we were directly sending to the connection from various parts of the codebase, it would be possible that the send channel would be closed and we could still send a message. This would crash the server. To fix this, we refactor the code to centralize all sending from the main hub goroutine. This means we can leverage the connections map to check if the connection exists or not, and only then send the message. We also move the cluster calls to cluster.go. * bring back cluster code inside hub * Incorporate review comments * Address review comments * rename index * MM-23807: Refactor web_conn - Unexport some struct fields and constants which are not necessary to be accessed from outside the package. This will help us moving the entire websocket handling code to a separate package later. - Change some empty string checks to check for empty string rather than doing a len check which is more idiomatic. Both of them compile to the same code. So it doesn't make a difference performance-wise. - Remove redundant ToJson calls to get the length. - Incorporate review comments - Unexport some more methods * Fix field name * Run make app-layers * Add note on hub check
2020-04-23 03:46:18 -04:00
hub.SendMessage(conn, errResp)
return
}
2021-07-12 14:05:36 -04:00
resp := model.NewWebSocketResponse(model.StatusOk, r.Seq, data)
MM-23805: Refactor web_hub (#14277) * MM-23800: remove goroutineID and stack printing Each hub has a goroutineID which is calculated with a known hack. The FAQ clearly explains why goroutines don't have an id: https://golang.org/doc/faq#no_goroutine_id. We only added that because sometimes the hub would be deadlocked and having the goroutineID would be useful when getting the stack trace. This is also problematic in stress tests because the hubs would frequently get overloaded and the logs would unnecessarily have stack traces. But that was in the past, and we have done extensive testing with load tests and fuzz testing to smooth any rough edges remaining. Including adding additional metrics for hub buffer size. Monitoring the metrics is a better way to approach this problem. Therefore, we remove these kludges from the code. * Also remove deadlock checking code There is no need for that anymore since we are getting rid of the stack printing anyways. Let's do a wholesale refactor and clean up the codebase. * MM-23805: Refactor web_hub This is a beginning of the refactoring of the websocket code. To start off with, we unexport some methods and constants which did not need to be exported. There are more remaining but some are out of scope for this PR. The main chunk of refactor is to unexport the webconn send channel which was the main cause of panics. Since we were directly sending to the connection from various parts of the codebase, it would be possible that the send channel would be closed and we could still send a message. This would crash the server. To fix this, we refactor the code to centralize all sending from the main hub goroutine. This means we can leverage the connections map to check if the connection exists or not, and only then send the message. We also move the cluster calls to cluster.go. * bring back cluster code inside hub * Incorporate review comments * Address review comments * rename index * MM-23807: Refactor web_conn - Unexport some struct fields and constants which are not necessary to be accessed from outside the package. This will help us moving the entire websocket handling code to a separate package later. - Change some empty string checks to check for empty string rather than doing a len check which is more idiomatic. Both of them compile to the same code. So it doesn't make a difference performance-wise. - Remove redundant ToJson calls to get the length. - Incorporate review comments - Unexport some more methods * Fix field name * Run make app-layers * Add note on hub check
2020-04-23 03:46:18 -04:00
hub.SendMessage(conn, resp)
}
func NewInvalidWebSocketParamError(action string, name string) *model.AppError {
return model.NewAppError("websocket: "+action, "api.websocket_handler.invalid_param.app_error", map[string]any{"Name": name}, "", http.StatusBadRequest)
}
func NewServerBusyWebSocketError(action string) *model.AppError {
return model.NewAppError("websocket: "+action, "api.websocket_handler.server_busy.app_error", nil, "", http.StatusServiceUnavailable)
}