Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add maxRetryAttempts field to deployment's config #586 #592

Merged
merged 1 commit into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 30 additions & 30 deletions README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,10 @@ public abstract class Deployment extends RoleBasedEntity {
*/
@JsonAlias({"descriptionKeywords", "description_keywords"})
private List<String> descriptionKeywords = List.of();

/**
* Indicated max retry attempts to route a single user request.
*/
@JsonAlias({"maxRetryAttempts", "max_retry_attempts"})
private int maxRetryAttempts = 1;
}
4 changes: 4 additions & 0 deletions config/src/main/java/com/epam/aidial/core/config/Model.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,8 @@ public class Model extends Deployment {
private List<Upstream> upstreams = List.of();
// if it's set then the model name is overridden with that name in the request body to the model adapter
private String overrideName;

public Model() {
setMaxRetryAttempts(5);
}
}
4 changes: 4 additions & 0 deletions config/src/main/java/com/epam/aidial/core/config/Route.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ public class Route extends RoleBasedEntity {
private List<Pattern> paths = List.of();
private Set<String> methods = Set.of();
private List<Upstream> upstreams = List.of();
/**
* Indicated max retry attempts to route a single user request.
*/
private int maxRetryAttempts = 1;

@Data
public static class Response {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public ApplicationData mapApplication(Application application) {
String reference = application.getReference();
data.setReference(reference == null ? application.getName() : reference);
data.setFunction(application.getFunction());
data.setMaxRetryAttempts(application.getMaxRetryAttempts());

return data;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ private void handleProxyResponse(HttpClientResponse proxyResponse) {
response.setStatusCode(proxyResponse.statusCode());

ProxyUtil.copyHeaders(proxyResponse.headers(), response.headers());
response.putHeader(Proxy.HEADER_UPSTREAM_ATTEMPTS, Integer.toString(upstreamRoute.used()));
response.putHeader(Proxy.HEADER_UPSTREAM_ATTEMPTS, Integer.toString(upstreamRoute.getAttemptCount()));

responseStream.pipe()
.endOnFailure(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,15 @@
import com.epam.aidial.core.server.data.ResourceTypes;
import com.epam.aidial.core.server.security.AccessService;
import com.epam.aidial.core.server.service.ApplicationService;
import com.epam.aidial.core.server.service.InvitationService;
import com.epam.aidial.core.server.service.PermissionDeniedException;
import com.epam.aidial.core.server.service.ResourceNotFoundException;
import com.epam.aidial.core.server.service.ShareService;
import com.epam.aidial.core.server.util.ProxyUtil;
import com.epam.aidial.core.server.util.ResourceDescriptorFactory;
import com.epam.aidial.core.storage.data.MetadataBase;
import com.epam.aidial.core.storage.data.ResourceItemMetadata;
import com.epam.aidial.core.storage.http.HttpException;
import com.epam.aidial.core.storage.http.HttpStatus;
import com.epam.aidial.core.storage.resource.ResourceDescriptor;
import com.epam.aidial.core.storage.service.LockService;
import com.epam.aidial.core.storage.service.ResourceService;
import com.epam.aidial.core.storage.util.EtagHeader;
import io.vertx.core.Future;
Expand All @@ -38,10 +35,7 @@ public class ResourceController extends AccessControlBaseController {

private final Vertx vertx;
private final ResourceService service;
private final ShareService shareService;
private final LockService lockService;
private final ApplicationService applicationService;
private final InvitationService invitationService;
private final boolean metadata;
private final AccessService accessService;

Expand All @@ -51,10 +45,7 @@ public ResourceController(Proxy proxy, ProxyContext context, boolean metadata) {
this.vertx = proxy.getVertx();
this.service = proxy.getResourceService();
this.applicationService = proxy.getApplicationService();
this.shareService = proxy.getShareService();
this.accessService = proxy.getAccessService();
this.lockService = proxy.getLockService();
this.invitationService = proxy.getInvitationService();
this.metadata = metadata;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@ public class DeploymentData {
private Integer maxInputAttachments;
private Map<String, Object> defaults;
private List<String> descriptionKeywords;
private int maxRetryAttempts;
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.epam.aidial.core.server.upstream;

import com.epam.aidial.core.config.Deployment;
import com.epam.aidial.core.config.Route;
import com.epam.aidial.core.config.Upstream;
import lombok.Getter;
import lombok.Setter;
Expand All @@ -25,9 +27,16 @@ class TieredBalancer implements LoadBalancer<UpstreamState> {
@Setter
private long lastAccessTime;

public TieredBalancer(String deploymentName, List<Upstream> upstreams) {
/**
* Note. The value is taken from {@link Deployment#getMaxRetryAttempts()} or {@link Route#getMaxRetryAttempts()}
*/
@Getter
private final int originalMaxRetryAttempts;

public TieredBalancer(String deploymentName, List<Upstream> upstreams, int originalMaxRetryAttempts) {
this.originalUpstreams = upstreams;
this.tiers = buildTiers(deploymentName, upstreams);
this.originalMaxRetryAttempts = originalMaxRetryAttempts;
}

@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.epam.aidial.core.config.Upstream;
import com.epam.aidial.core.storage.http.HttpStatus;
import io.vertx.core.http.HttpClientResponse;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;

import javax.annotation.Nullable;
Expand Down Expand Up @@ -31,29 +32,26 @@ public class UpstreamRoute {

private final LoadBalancer<UpstreamState> balancer;
/**
* The maximum number of upstreams this route can use due to retries.
* The maximum number of attempts the route may retry
*/
private final int maxUpstreamsToUse;
private final int maxRetryAttempts;

/**
* Current upstream state
*/
@Nullable
private UpstreamState upstreamState;
private int used;
/**
* Attempt counter
*/
@Getter
private int attemptCount;

public UpstreamRoute(LoadBalancer<UpstreamState> balancer, int maxUpstreamsToUse) {
public UpstreamRoute(LoadBalancer<UpstreamState> balancer, int maxRetryAttempts) {
this.balancer = balancer;
this.maxUpstreamsToUse = maxUpstreamsToUse;
this.maxRetryAttempts = maxRetryAttempts;
this.upstreamState = balancer.next();
this.used = upstreamState == null ? 0 : 1;
}

/**
* @return the number of used upstreams.
*/
public int used() {
return used;
this.attemptCount = upstreamState == null ? 0 : 1;
}

/**
Expand All @@ -62,7 +60,7 @@ public int used() {
* @return true if upstream available, false otherwise
*/
public boolean available() {
return upstreamState != null && used <= maxUpstreamsToUse;
return upstreamState != null && attemptCount <= maxRetryAttempts;
}

/**
Expand All @@ -73,11 +71,11 @@ public boolean available() {
@Nullable
public Upstream next() {
// if max attempts reached - do not call balancer
if (used + 1 > maxUpstreamsToUse) {
if (attemptCount + 1 > maxRetryAttempts) {
this.upstreamState = null;
return null;
}
used++;
attemptCount++;
this.upstreamState = balancer.next();
return upstreamState == null ? null : upstreamState.getUpstream();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@
@Slf4j
public class UpstreamRouteProvider {

/**
* Indicated max retry attempts (max upstreams from load balancer) to route a single user request
*/
private static final int MAX_RETRY_COUNT = 5;
/**
* Maximum idle period while balancers will stay in the local cache.
*/
Expand All @@ -44,26 +40,28 @@ public UpstreamRouteProvider(Vertx vertx) {
public UpstreamRoute get(Deployment deployment) {
String key = getKey(deployment);
List<Upstream> upstreams = getUpstreams(deployment);
return get(key, upstreams);
return get(key, upstreams, deployment.getMaxRetryAttempts());
}

public UpstreamRoute get(Route route) {
String key = getKey(route);
return get(key, route.getUpstreams());
return get(key, route.getUpstreams(), route.getMaxRetryAttempts());
}

private UpstreamRoute get(String key, List<Upstream> upstreams) {
private UpstreamRoute get(String key, List<Upstream> upstreams, int maxRetryAttempts) {
TieredBalancer balancer = balancers.compute(key, (k, cur) -> {
TieredBalancer result;
if (cur != null && isUpstreamsTheSame(cur.getOriginalUpstreams(), upstreams)) {
if (cur != null && isUpstreamsTheSame(cur.getOriginalUpstreams(), upstreams)
&& maxRetryAttempts == cur.getOriginalMaxRetryAttempts()) {
result = cur;
} else {
result = new TieredBalancer(key, upstreams);
result = new TieredBalancer(key, upstreams, maxRetryAttempts);
}
result.setLastAccessTime(System.currentTimeMillis());
return result;
});
return new UpstreamRoute(balancer, MAX_RETRY_COUNT);
int result = Math.min(maxRetryAttempts, upstreams.size());
return new UpstreamRoute(balancer, result);
}

private List<Upstream> getUpstreams(Deployment deployment) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ void testApplicationStarted() {
"defaults" : { },
"interceptors" : [ ],
"description_keywords" : [ ],
"max_retry_attempts" : 1,
"function" : {
"id" : "0123",
"runtime": "python3.11",
Expand Down Expand Up @@ -117,6 +118,7 @@ void testApplicationStarted() {
"defaults" : { },
"interceptors" : [ ],
"description_keywords" : [ ],
"max_retry_attempts" : 1,
"function" : {
"id" : "0123",
"runtime": "python3.11",
Expand Down Expand Up @@ -168,6 +170,7 @@ void testApplicationStopped() {
"defaults" : { },
"interceptors" : [ ],
"description_keywords" : [ ],
"max_retry_attempts" : 1,
"function" : {
"id" : "0123",
"runtime": "python3.11",
Expand Down Expand Up @@ -199,6 +202,7 @@ void testApplicationStopped() {
"defaults" : { },
"interceptors" : [ ],
"description_keywords" : [ ],
"max_retry_attempts" : 1,
"function" : {
"id" : "0123",
"runtime": "python3.11",
Expand Down Expand Up @@ -250,6 +254,7 @@ void testApplicationFailed() {
"defaults" : { },
"interceptors" : [ ],
"description_keywords" : [ ],
"max_retry_attempts" : 1,
"function" : {
"id" : "0123",
"runtime": "python3.11",
Expand Down Expand Up @@ -281,6 +286,7 @@ void testApplicationFailed() {
"defaults" : { },
"interceptors" : [ ],
"description_keywords" : [ ],
"max_retry_attempts" : 1,
"function" : {
"id" : "0123",
"runtime": "python3.11",
Expand Down Expand Up @@ -329,6 +335,7 @@ void testRecoverApplicationAfterFailedStart() throws Exception {
"defaults" : { },
"interceptors" : [ ],
"description_keywords" : [ ],
"max_retry_attempts" : 1,
"function" : {
"id" : "0123",
"runtime": "python3.11",
Expand Down Expand Up @@ -384,6 +391,7 @@ void testRecoverApplicationAfterFailedStop() throws Exception {
"defaults" : { },
"interceptors" : [ ],
"description_keywords" : [ ],
"max_retry_attempts" : 1,
"function" : {
"id" : "0123",
"runtime": "python3.11",
Expand Down Expand Up @@ -582,6 +590,7 @@ void testControllerError() {
"defaults" : { },
"interceptors" : [ ],
"description_keywords" : [ ],
"max_retry_attempts" : 1,
"function" : {
"id" : "0123",
"runtime": "python3.11",
Expand Down Expand Up @@ -659,6 +668,7 @@ void testPublication() {
"defaults" : { },
"interceptors" : [ ],
"description_keywords" : [ ],
"max_retry_attempts" : 1,
"function" : {
"id" : "0127",
"runtime": "python3.11",
Expand Down Expand Up @@ -772,6 +782,7 @@ void testOpenAiApi() {
},
"defaults" : { },
"description_keywords" : [ ],
"max_retry_attempts" : 1,
"function" : {
"id" : "0123",
"runtime" : "python3.11",
Expand Down Expand Up @@ -819,7 +830,8 @@ void testOpenAiApi() {
"content_parts_supported": false
},
"defaults" : { },
"description_keywords" : [ ]
"description_keywords" : [ ],
"max_retry_attempts" : 1
}, {
"id" : "applications/3CcedGxCx23EwiVbVmscVktScRyf46KypuBQ65miviST/my-app",
"application" : "applications/3CcedGxCx23EwiVbVmscVktScRyf46KypuBQ65miviST/my-app",
Expand Down Expand Up @@ -849,6 +861,7 @@ void testOpenAiApi() {
},
"defaults" : { },
"description_keywords" : [ ],
"max_retry_attempts" : 1,
"function" : {
"id" : "0123",
"runtime" : "python3.11",
Expand Down
Loading
Loading